Ejemplo n.º 1
0
  def compute_accuracy(x, l, mask):
    """Compute model accuracy."""
    preds = ch_model.get_probs(x)
    preds = tf.squeeze(preds)
    preds = tf.argmax(preds, -1, output_type=l.dtype)

    _, acc_update_op = tf.metrics.accuracy(l, preds, weights=mask)

    if FLAGS.surrogate_attack:
      preds = sur_ch_model.get_probs(x)
      preds = tf.squeeze(preds)
      preds = tf.argmax(preds, -1, output_type=l.dtype)
      acc_update_op = tf.tuple((acc_update_op,
                                tf.metrics.accuracy(l, preds, weights=mask)[1]))

    sess.run(tf.initialize_local_variables())
    for i in range(FLAGS.eval_steps):
      tf.logging.info(
          "\tEvaluating batch [%d / %d]" % (i + 1, FLAGS.eval_steps))
      acc = sess.run(acc_update_op)
    if FLAGS.surrogate_attack:
      tf.logging.info("\tFinal acc: (%.4f, %.4f)" % (acc[0], acc[1]))
    else:
      tf.logging.info("\tFinal acc: %.4f" % acc)
    return acc
Ejemplo n.º 2
0
    def testRecallAtK(self):
        labels = tf.convert_to_tensor([[1, 0, 0], [0, 1, 1], [0, 0, 0]],
                                      dtype=tf.float32)
        predictions = tf.convert_to_tensor([[0.5, 1.0, 0.0], [0.5, 0.0, 1.0],
                                            [0.25, 0.25, 0.4]])

        k_values = [0, 1, 2]
        expected_recall_values_for_each_k = [0, 0.25, 0.75]
        # For k = 0 we get 0% from both of first two (3rd example is always NA)
        # For k = 1 we get 0% from first example and 50% from second
        # For k = 2 we get 100% from first example and 50% from second

        values_and_updates = [
            protein_model._custom_recall_at_k(labels_as_multi_hot=labels,
                                              predictions=predictions,
                                              k=k) for k in k_values
        ]

        with tf.Session() as sess:
            for i, value_and_update in enumerate(values_and_updates):
                value, update_op = value_and_update
                sess.run(tf.initialize_local_variables())
                sess.run(update_op)
                actual_recall = sess.run(value)
                self.assertEqual(actual_recall,
                                 expected_recall_values_for_each_k[i])
    def run_on_files(self,
                     input_filenames,
                     batch_size,
                     max_size=None,
                     keys=None):
        """Run evaluation on the given files.

    Args:
      input_filenames: list of strings providing paths to input sstables.
      batch_size: integer mini-batch size.
      max_size: optional maximum number of examples to evalute.
      keys: Optional sequence of string tensor names to evaluate. By default,
        uses all known tensors.

    Returns:
      xarray.Dataset with data variables for each of the given keys.
    """
        with tf.Graph().as_default():
            examples = examples_from_sstable(input_filenames, batch_size)
            tensors = self.evaluation_tensors(examples, keys=keys)

            sess = tf.Session()
            # initialize the num_epochs counter
            sess.run(tf.initialize_local_variables())
            self._restore(sess)

            # TODO(shoyer): after cl/133322369 is merged, switch to use
            # tf.contrib.metrics.streaming_concat instead of fetch_across_batches.
            arrays_list = fetch_across_batches(sess, list(tensors.values()),
                                               max_size)
            arrays_dict = dict(list(zip(list(tensors.keys()), arrays_list)))
            return convert_labeled_tensor_to_xarray(arrays_dict, tensors)
Ejemplo n.º 4
0
 def eval_model():
     preds = spec.predictions["predictions"]
     preds = tf.argmax(preds, -1, output_type=labels.dtype)
     _, acc_update_op = tf.metrics.accuracy(labels=labels,
                                            predictions=preds)
     sess.run(tf.initialize_local_variables())
     for _ in range(FLAGS.eval_steps):
         acc = sess.run(acc_update_op)
     return acc
Ejemplo n.º 5
0
    def load_preaggregated_data(self):
        # Return objects of this function
        X = None
        Y = None
        X_valid = None
        Y_valid = None

        # Load pre-aggregated training dataset
        tfrecord_file_list = os.listdir(self.preaggregated_data_path)
        tfrecord_file_list = [
            os.path.join(self.preaggregated_data_path, k)
            for k in tfrecord_file_list
        ]
        print('Pre-aggregated file list = ' + str(tfrecord_file_list))
        reader = tf.TFRecordReader()
        key, examples = reader.read(
            tf.train.string_input_producer(
                tfrecord_file_list,
                num_epochs=1))  # Only generate all data once

        name_to_features = {
            "input_ids": tf.io.FixedLenFeature([self.max_seq_length],
                                               tf.int64),
            "input_mask": tf.io.FixedLenFeature([self.max_seq_length],
                                                tf.int64),
            "segment_ids": tf.io.FixedLenFeature([self.max_seq_length],
                                                 tf.int64),
        }

        parsed_example = tf.parse_single_example(examples, name_to_features)
        parsed_example_values = list(parsed_example.values())

        # Reuse Keras Session
        sess = K.get_session()

        # Just read all data into array for now.
        # TODO: Implment generator to support very large dataset that is not fit into RAM
        all_data = []
        sess.run(tf.initialize_local_variables())
        tf.train.start_queue_runners(sess=sess)
        try:
            while True:
                data = sess.run(parsed_example_values)
                for i in range(len(data)):
                    if len(all_data) <= i:
                        all_data.append([])
                    all_data[i].append(data[i])
        except tf.errors.OutOfRangeError:
            pass
        all_data = [np.array(a) for a in all_data]
        X = all_data
        Y = all_data[0]  # Y is only 'input_ids' tensor
        K.clear_session()  # sess object is not valid anymore after this

        # Load pre-aggregated validation dataset
        tfrecord_file_list = os.listdir(
            self.preaggregated_validation_data_path)
        tfrecord_file_list = [
            os.path.join(self.preaggregated_validation_data_path, k)
            for k in tfrecord_file_list
        ]
        print('Pre-aggregated file list = ' + str(tfrecord_file_list))
        reader = tf.TFRecordReader()
        key, examples = reader.read(
            tf.train.string_input_producer(
                tfrecord_file_list,
                num_epochs=1))  # Only generate all data once

        name_to_features = {
            "input_ids": tf.io.FixedLenFeature([self.max_seq_length],
                                               tf.int64),
            "input_mask": tf.io.FixedLenFeature([self.max_seq_length],
                                                tf.int64),
            "segment_ids": tf.io.FixedLenFeature([self.max_seq_length],
                                                 tf.int64),
        }

        parsed_example = tf.parse_single_example(examples, name_to_features)
        parsed_example_values = list(parsed_example.values())

        # Reuse Keras Session
        sess = K.get_session()

        # Just read all data into array for now.
        # TODO: Implment generator to support very large dataset that is not fit into RAM
        all_data = []
        sess.run(tf.initialize_local_variables())
        tf.train.start_queue_runners(sess=sess)
        try:
            while True:
                data = sess.run(parsed_example_values)
                for i in range(len(data)):
                    if len(all_data) <= i:
                        all_data.append([])
                    all_data[i].append(data[i])
        except tf.errors.OutOfRangeError:
            pass
        all_data = [np.array(a) for a in all_data]
        X_valid = all_data
        Y_valid = all_data[0]  # Y is only 'input_ids' tensor
        K.clear_session()  # sess object is not valid anymore after this

        #print(len(X_valid))
        #print(len(Y_valid))

        return (X, Y, X_valid, Y_valid)
Ejemplo n.º 6
0
def run(config,
        target='',
        cluster_spec=None,
        is_chief=True,
        job_name=None,
        task_index=None,
        get_model_fn=get_model,
        get_dataset_fn=get_dataset,
        environment=None):
    model_class = get_model_fn(config.model.type)

    image_vis = config.train.get('image_vis')
    var_vis = config.train.get('var_vis')

    if config.train.get('seed') is not None:
        tf.set_random_seed(config.train.seed)

    log_prefix = '[{}-{}] - '.format(job_name, task_index) \
        if job_name is not None and task_index is not None else ''

    if config.train.debug or config.train.tf_debug:
        tf.logging.set_verbosity(tf.logging.DEBUG)
    else:
        tf.logging.set_verbosity(tf.logging.INFO)

    model = model_class(config)

    # Placement of ops on devices using replica device setter
    # which automatically places the parameters on the `ps` server
    # and the `ops` on the workers
    #
    # See:
    # https://www.tensorflow.org/api_docs/python/tf/train/replica_device_setter
    with tf.device(tf.train.replica_device_setter(cluster=cluster_spec)):
        try:
            config['dataset']['type']
        except KeyError:
            raise KeyError('dataset.type should be set on the custom config.')

        try:
            dataset_class = get_dataset_fn(config.dataset.type)
            dataset = dataset_class(config)
            train_dataset = dataset()
        except InvalidDataDirectory as exc:
            tf.logging.error("Error while reading dataset, {}".format(exc))
            sys.exit(1)

        train_image = train_dataset['image']
        train_filename = train_dataset['filename']
        train_bboxes = train_dataset['bboxes']

        prediction_dict = model(train_image, train_bboxes, is_training=True)
        total_loss = model.loss(prediction_dict)

        global_step = tf.train.get_or_create_global_step()

        optimizer = get_optimizer(config.train, global_step)

        # TODO: Is this necesarry? Couldn't we just get them from the
        # trainable vars collection? We should probably improve our
        # usage of collections.
        trainable_vars = model.get_trainable_vars()

        # Compute, clip and apply gradients
        with tf.name_scope('gradients'):
            grads_and_vars = optimizer.compute_gradients(
                total_loss, trainable_vars)

            if config.train.clip_by_norm:
                grads_and_vars = clip_gradients_by_norm(grads_and_vars)

        update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
        with tf.control_dependencies(update_ops):
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)

        # Create custom init for slots in optimizer, as we don't save them to
        # our checkpoints. An example of slots in an optimizer are the Momentum
        # variables in MomentumOptimizer. We do this because slot variables can
        # effectively duplicate the size of your checkpoint!
        slot_variables = [
            optimizer.get_slot(var, name)
            for name in optimizer.get_slot_names() for var in trainable_vars
        ]
        slot_init = tf.variables_initializer(
            slot_variables, name='optimizer_slots_initializer')

        # Create saver for saving/restoring model
        model_saver = tf.train.Saver(
            set(tf.global_variables()) - set(slot_variables),
            name='model_saver',
            max_to_keep=config.train.get('checkpoints_max_keep', 1),
        )

        # Create saver for loading pretrained checkpoint into base network
        base_checkpoint_vars = model.get_base_network_checkpoint_vars()
        checkpoint_file = model.get_checkpoint_file()
        if base_checkpoint_vars and checkpoint_file:
            base_net_checkpoint_saver = tf.train.Saver(
                base_checkpoint_vars, name='base_net_checkpoint_saver')

            # We'll send this fn to Scaffold init_fn
            def load_base_net_checkpoint(_, session):
                base_net_checkpoint_saver.restore(session, checkpoint_file)
        else:
            load_base_net_checkpoint = None

    tf.logging.info('{}Starting training for {}'.format(log_prefix, model))

    run_options = None
    if config.train.full_trace:
        run_options = tf.RunOptions(trace_level=tf.RunOptions.FULL_TRACE)

    # Create custom Scaffold to make sure we run our own init_op when model
    # is not restored from checkpoint.
    summary_op = [model.summary]
    summaries = tf.summary.merge_all()
    if summaries is not None:
        summary_op.append(summaries)
    summary_op = tf.summary.merge(summary_op)

    # `ready_for_local_init_op` is hardcoded to 'ready' as local init doesn't
    # depend on global init and `local_init_op` only runs when it is set as
    # 'ready' (an empty string tensor sets it as ready).
    scaffold = tf.train.Scaffold(
        saver=model_saver,
        init_op=tf.global_variables_initializer() if is_chief else tf.no_op(),
        local_init_op=tf.group(tf.initialize_local_variables(), slot_init),
        ready_for_local_init_op=tf.constant([], dtype=tf.string),
        summary_op=summary_op,
        init_fn=load_base_net_checkpoint,
    )

    # Custom hooks for our session
    hooks = []
    chief_only_hooks = []

    if config.train.tf_debug:
        debug_hook = tf_debug.LocalCLIDebugHook()
        debug_hook.add_tensor_filter('has_inf_or_nan', tf_debug.has_inf_or_nan)
        hooks.extend([debug_hook])

    if not config.train.job_dir:
        tf.logging.warning(
            '`job_dir` is not defined. Checkpoints and logs will not be saved.'
        )
        checkpoint_dir = None
    elif config.train.run_name:
        # Use run_name when available
        checkpoint_dir = os.path.join(config.train.job_dir,
                                      config.train.run_name)
    else:
        checkpoint_dir = config.train.job_dir

    should_add_hooks = (config.train.display_every_steps
                        or config.train.display_every_secs
                        and checkpoint_dir is not None)
    if should_add_hooks:
        if not config.train.debug and image_vis == 'debug':
            tf.logging.warning('ImageVisHook will not run without debug mode.')
        elif image_vis is not None:
            # ImageVis only runs on the chief.
            chief_only_hooks.append(
                ImageVisHook(prediction_dict,
                             image=train_dataset['image'],
                             gt_bboxes=train_dataset['bboxes'],
                             config=config.model,
                             output_dir=checkpoint_dir,
                             every_n_steps=config.train.display_every_steps,
                             every_n_secs=config.train.display_every_secs,
                             image_visualization_mode=image_vis))

        if var_vis is not None:
            # VarVis only runs on the chief.
            chief_only_hooks.append(
                VarVisHook(
                    every_n_steps=config.train.display_every_steps,
                    every_n_secs=config.train.display_every_secs,
                    mode=var_vis,
                    output_dir=checkpoint_dir,
                    vars_summary=model.vars_summary,
                ))

    step = -1
    with tf.train.MonitoredTrainingSession(
            master=target,
            is_chief=is_chief,
            checkpoint_dir=checkpoint_dir,
            scaffold=scaffold,
            hooks=hooks,
            chief_only_hooks=chief_only_hooks,
            save_checkpoint_secs=config.train.save_checkpoint_secs,
            save_summaries_steps=config.train.save_summaries_steps,
            save_summaries_secs=config.train.save_summaries_secs,
    ) as sess:

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            while not coord.should_stop():
                before = time.time()
                _, train_loss, step, filename = sess.run(
                    [train_op, total_loss, global_step, train_filename],
                    options=run_options)

                # TODO: Add image summary every once in a while.

                tf.logging.info(
                    '{}step: {}, file: {}, train_loss: {}, in {:.2f}s'.format(
                        log_prefix, step, filename, train_loss,
                        time.time() - before))

                if is_chief and step == 1:
                    # We save the run after first batch to make sure everything
                    # works properly.
                    save_run(config, environment=environment)

        except tf.errors.OutOfRangeError:
            tf.logging.info('{}finished training after {} epoch limit'.format(
                log_prefix, config.train.num_epochs))

            # TODO: Print summary
        finally:
            coord.request_stop()

        # Wait for all threads to stop.
        coord.join(threads)

        return step
Ejemplo n.º 7
0
def predict(model_root, datasets_dir, model_name, test_image_name):
    with tf.Graph().as_default():
        tf_global_step = slim.get_or_create_global_step()

        test_image = os.path.join(datasets_dir, test_image_name)

        # dataset = convert_data.get_datasets('test',dataset_dir=datasets_dir)

        network_fn = net_select.get_network_fn(model_name,
                                               num_classes=20,
                                               is_training=False)
        batch_size = 1
        eval_image_size = network_fn.default_image_size

        # images, images_raw, labels = load_batch(datasets_dir,
        #                                         height=eval_image_size,
        #                                         width=eval_image_size)

        image_preprocessing_fn = preprocessing_select.get_preprocessing(
            model_name, is_training=False)

        image_data = tf.io.read_file(test_image)
        image_data = tf.image.decode_jpeg(image_data, channels=3)
        image_data = image_preprocessing_fn(image_data, eval_image_size,
                                            eval_image_size)
        image_data = tf.expand_dims(image_data, 0)

        logits_1, end_points_1 = network_fn(image_data)
        attention_maps = tf.reduce_mean(end_points_1['attention_maps'],
                                        axis=-1,
                                        keepdims=True)
        attention_maps = tf.image.resize(attention_maps,
                                         [eval_image_size, eval_image_size],
                                         method=tf.image.ResizeMethod.BILINEAR)
        bboxes = tf_v1.py_func(mask2bbox, [attention_maps], [tf.float32])
        bboxes = tf.reshape(bboxes, [batch_size, 4])
        # print(bboxes)
        box_ind = tf.range(batch_size, dtype=tf.int32)

        images = tf.image.crop_and_resize(
            image_data,
            bboxes,
            box_ind,
            crop_size=[eval_image_size, eval_image_size])
        logits_2, end_points_2 = network_fn(images, reuse=True)

        logits = tf.math.log(
            tf.nn.softmax(logits_1) * 0.5 + tf.nn.softmax(logits_2) * 0.5)

        checkpoint_path = os.path.join(model_root, model_name)

        if tf.io.gfile.isdir(checkpoint_path):
            checkpoint_path = tf.train.latest_checkpoint(checkpoint_path)
        else:
            checkpoint_path = checkpoint_path

        init_fn = slim.assign_from_checkpoint_fn(
            checkpoint_path, slim.get_variables_to_restore())

        # with tf_v1.Session() as sess:
        #     with slim.queues.QueueRunners(sess):
        #         sess.run(tf_v1.initialize_local_variables())
        #         init_fn(sess)
        #         np_probabilities, np_images_raw, np_labels = sess.run([logits, images_raw, labels])
        #
        #         for i in range(batch_size):
        #             image = np_images_raw[i, :, :, :]
        #             true_label = np_labels[i]
        #             predicted_label = np.argmax(np_probabilities[i, :])
        #             print('true is {}, predict is {}'.format(true_label, predicted_label))

        with tf_v1.Session() as sess:
            with slim.queues.QueueRunners(sess):
                sess.run(tf_v1.initialize_local_variables())
                init_fn(sess)
                np_images, np_probabilities = sess.run([image_data, logits])
                predicted_label = np.argmax(np_probabilities[0, :])
                print(predicted_label)