Beispiel #1
0
def get_dataset(flags, mode):
    if mode == tf.estimator.ModeKeys.TRAIN:
        dataset_split = 'train'
    elif mode == tf.estimator.ModeKeys.EVAL:
        dataset_split = 'val'
    else:
        assert False, 'unknown mode'

    FLAGS = flags
    data_config = edict()
    data_config.edge_width = 20
    data_config.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset]
    data_config.edge_class_num = FLAGS.edge_class_num
    img_files, label_files = get_dataset_files(
        FLAGS.dataset, dataset_split)

    dataset_pp = dataset_pipeline(
        data_config, img_files, label_files, is_train=True)
    data_list = dataset_pp.iterator()

    if mode == tf.estimator.ModeKeys.TRAIN:
        samples = input_generator.get(
            (data_list, data_config.ignore_label),
            FLAGS.train_crop_size,
            FLAGS.train_batch_size//FLAGS.num_clones,
            min_resize_value=FLAGS.min_resize_value,
            max_resize_value=FLAGS.max_resize_value,
            resize_factor=FLAGS.resize_factor,
            min_scale_factor=FLAGS.min_scale_factor,
            max_scale_factor=FLAGS.max_scale_factor,
            scale_factor_step_size=FLAGS.scale_factor_step_size,
            dataset_split=FLAGS.train_split,
            is_training=True,
            model_variant=FLAGS.model_variant)
    elif mode == tf.estimator.ModeKeys.EVAL:
        samples = input_generator.get(
            (data_list, data_config.ignore_label),
            FLAGS.eval_crop_size,
            FLAGS.eval_batch_size,
            min_resize_value=FLAGS.min_resize_value,
            max_resize_value=FLAGS.max_resize_value,
            resize_factor=FLAGS.resize_factor,
            dataset_split=FLAGS.eval_split,
            is_training=False,
            model_variant=FLAGS.model_variant)
    else:
        assert False, 'unknown mode'

    return samples[common.IMAGE], samples[common.LABEL]
Beispiel #2
0
def do_prepare():
    dataset = segmentation_dataset.get_dataset(A_dataset,
                                               A_vis_split,
                                               dataset_dir=A_dataset_dir)
    # with tf.Graph().as_default():
    # with tf.get_default_graph():
    samples = input_generator.get(dataset,
                                  A_vis_crop_size,
                                  A_vis_batch_size,
                                  min_resize_value=A_min_resize_value,
                                  max_resize_value=A_max_resize_value,
                                  resize_factor=A_resize_factor,
                                  dataset_split=A_vis_split,
                                  is_training=False,
                                  model_variant=A_model_variant)

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=A_vis_crop_size,
        atrous_rates=A_atrous_rates,
        output_stride=A_output_stride)
    print(samples[common.IMAGE])

    predictions = model.predict_labels(samples[common.IMAGE],
                                       model_options=model_options,
                                       image_pyramid=A_image_pyramid)

    predictions = predictions[common.OUTPUT_TYPE]

    return samples, predictions
Beispiel #3
0
def vis_main(sess):
    dataset = segmentation_dataset.get_dataset(A_dataset,
                                               A_vis_split,
                                               dataset_dir=A_dataset_dir)

    train_id_to_eval_id = None

    save_dir = os.path.join(work_dir, _SEMANTIC_PREDICTION_SAVE_FOLDER)

    print('min_resize_value: %s' % str(A_min_resize_value))
    print('max_resize_value: %s' % str(A_max_resize_value))

    samples = input_generator.get(dataset,
                                  A_vis_crop_size,
                                  A_vis_batch_size,
                                  min_resize_value=A_min_resize_value,
                                  max_resize_value=A_max_resize_value,
                                  resize_factor=A_resize_factor,
                                  dataset_split=A_vis_split,
                                  is_training=False,
                                  model_variant=A_model_variant)

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=A_vis_crop_size,
        atrous_rates=A_atrous_rates,
        output_stride=A_output_stride)

    print(samples[common.IMAGE])

    predictions = model.predict_labels(samples[common.IMAGE],
                                       model_options=model_options,
                                       image_pyramid=A_image_pyramid)

    predictions = predictions[common.OUTPUT_TYPE]
Beispiel #4
0
def test():
    vis_graph = tf.Graph()
    with vis_graph.as_default():
        dataset = segmentation_dataset.get_dataset(A_dataset,
                                                   A_vis_split,
                                                   dataset_dir=A_dataset_dir)
        samples = input_generator.get(dataset,
                                      A_vis_crop_size,
                                      A_vis_batch_size,
                                      min_resize_value=A_min_resize_value,
                                      max_resize_value=A_max_resize_value,
                                      resize_factor=A_resize_factor,
                                      dataset_split=A_vis_split,
                                      is_training=False,
                                      model_variant=A_model_variant)
        model_options = mycommon.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=A_vis_crop_size,
            atrous_rates=A_atrous_rates,
            output_stride=A_output_stride)
        print(samples[common.IMAGE])

        predictions = model.predict_labels(samples[common.IMAGE],
                                           model_options=model_options,
                                           image_pyramid=A_image_pyramid)

        predictions = predictions[common.OUTPUT_TYPE]

        tf.train.get_or_create_global_step()
        #vis_session = tf.Session(graph=vis_graph)
        saver = tf.train.Saver(slim.get_variables_to_restore())
        sv = tf.train.Supervisor(graph=vis_graph,
                                 logdir=A_vis_logdir,
                                 init_op=tf.global_variables_initializer(),
                                 summary_op=None,
                                 summary_writer=None,
                                 global_step=None,
                                 saver=saver)

        vis_one(
            vis_graph,
            '/DATA/ylxiong/homeplus/data_general/JPEGImages/building(11).jpg',
            samples, predictions, sv)
        vis_one(
            vis_graph,
            '/DATA/ylxiong/homeplus/data_general/JPEGImages/building(12).jpg',
            samples, predictions, sv)
        vis_one(
            vis_graph,
            '/DATA/ylxiong/homeplus/data_general/JPEGImages/building(13).jpg',
            samples, predictions, sv)
Beispiel #5
0
def main(unused_argv):
  print("DEEPLABv3+")
  print("SAVE TO "+FLAGS.train_logdir)
  tf.logging.set_verbosity(tf.logging.INFO)
  # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
  config = model_deploy.DeploymentConfig(
      num_clones=FLAGS.num_clones,
      clone_on_cpu=FLAGS.clone_on_cpu,
      replica_id=FLAGS.task,
      num_replicas=FLAGS.num_replicas,
      num_ps_tasks=FLAGS.num_ps_tasks)
  print("batch_norm: "+str(FLAGS.fine_tune_batch_norm))
  print("initialize_last_layer: "+str(FLAGS.initialize_last_layer))
  # Split the batch across GPUs.
  assert FLAGS.train_batch_size % config.num_clones == 0, (
      'Training batch size not divisble by number of clones (GPUs).')

  clone_batch_size = FLAGS.train_batch_size // config.num_clones

  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir)
  dataset_val = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.val_split, dataset_dir=FLAGS.dataset_dir)

  tf.gfile.MakeDirs(FLAGS.train_logdir)
  tf.logging.info('Training on %s set', FLAGS.train_split)
  tf.logging.info('Validating on %s set', FLAGS.val_split)

  with tf.Graph().as_default() as graph:
    with tf.device(config.inputs_device()):
      samples = input_generator.get(
          dataset,
          FLAGS.train_crop_size,
          clone_batch_size,
          min_resize_value=FLAGS.min_resize_value,
          max_resize_value=FLAGS.max_resize_value,
          resize_factor=FLAGS.resize_factor,
          min_scale_factor=FLAGS.min_scale_factor,
          max_scale_factor=FLAGS.max_scale_factor,
          scale_factor_step_size=FLAGS.scale_factor_step_size,
          dataset_split=FLAGS.train_split,
          is_training=True,
          model_variant=FLAGS.model_variant)
      inputs_queue = prefetch_queue.prefetch_queue(
          samples, capacity=128 * config.num_clones)

    # 4 val

    samples_val = input_generator.get(
        dataset_val,
        FLAGS.train_crop_size,
        FLAGS.train_batch_size,
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        dataset_split=FLAGS.val_split,
        is_training=False,
        model_variant=FLAGS.model_variant)

    # Create the global step on the device storing the variables.
    with tf.device(config.variables_device()):
      global_step = tf.train.get_or_create_global_step()

      # Define the model and create clones.
      model_fn = _build_deeplab
      model_args = (inputs_queue, {
          common.OUTPUT_TYPE: dataset.num_classes
      }, dataset.ignore_label)
      clones = model_deploy.create_clones(config, model_fn, args=model_args)

      # Gather update_ops from the first clone. These contain, for example,
      # the updates for the batch_norm variables created by model_fn.
      first_clone_scope = config.clone_scope(0)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    # Add summaries for model variables.
    for model_var in slim.get_model_variables():
      summaries.add(tf.summary.histogram(model_var.op.name, model_var))

    # Add summaries for images, labels, semantic predictions
    if FLAGS.save_summaries_images:
      summary_image = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
      summaries.add(
          tf.summary.image('samples/%s' % common.IMAGE, summary_image))

      first_clone_label = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
      # Scale up summary image pixel values for better visualization.
      pixel_scaling = max(1, 255 // dataset.num_classes)
      summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8)
      summaries.add(
          tf.summary.image('samples/%s' % common.LABEL, summary_label))

      first_clone_output = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
      predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1)

      summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8)
      summaries.add(
          tf.summary.image(
              'samples/%s' % common.OUTPUT_TYPE, summary_predictions))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Build the optimizer based on the device specification.
    with tf.device(config.optimizer_device()):
      learning_rate = train_utils.get_model_learning_rate(
          FLAGS.learning_policy, FLAGS.base_learning_rate,
          FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
          FLAGS.training_number_of_steps, FLAGS.learning_power,
          FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
      optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    with tf.device(config.variables_device()):
      total_loss, grads_and_vars = model_deploy.optimize_clones(
          clones, optimizer)
      total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
      summaries.add(tf.summary.scalar('total_loss', total_loss))

      # Modify the gradients for biases and last layer variables.
      last_layers = model.get_extra_layer_scopes(
          FLAGS.last_layers_contain_logits_only)
      grad_mult = train_utils.get_model_gradient_multipliers(
          last_layers, FLAGS.last_layer_gradient_multiplier)
      if grad_mult:
        grads_and_vars = slim.learning.multiply_gradients(
            grads_and_vars, grad_mult)

      # Create gradient update op.
      grad_updates = optimizer.apply_gradients(
          grads_and_vars, global_step=global_step)
      update_ops.append(grad_updates)
      update_op = tf.group(*update_ops)
      with tf.control_dependencies([update_op]):
        train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(
        tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries))

    # Soft placement allows placing on CPU ops without GPU implementation.
    session_config = tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=False)

    # 4 val
    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=FLAGS.train_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)
    predictions_val = model.predict_labels(samples_val[common.IMAGE], model_options,
                                         image_pyramid=FLAGS.image_pyramid)
    predictions_val = predictions_val[common.OUTPUT_TYPE]
    predictions_val = tf.reshape(predictions_val, shape=[-1])
    labels_val = tf.reshape(samples_val[common.LABEL], shape=[-1])

    # Set ignore_label regions to label 0, because metrics.mean_iou requires
    # range of labels = [0, dataset.num_classes). Note the ignore_label regions
    # are not evaluated since the corresponding regions contain weights = 0.
    #labels = tf.where(
    #    tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)
    accuracy_validation = slim.metrics.accuracy(tf.to_int32(predictions_val),
                                                tf.to_int32(labels_val))
    iou,conf_mat = tf.metrics.mean_iou(labels_val, predictions_val, num_classes=6)
    #sess.run(tf.local_variables_initializer())


    def train_step_fn(session, *args, **kwargs):
        total_loss, should_stop = train_step(session, *args, **kwargs)

        if train_step_fn.step % FLAGS.validation_check == 0:
            pass
            # throws OutOfRange error after some time
         #   accuracy = session.run(train_step_fn.accuracy_validation)
          #  print('Step %s - Loss: %.2f Accuracy: %.2f%%' % (
          #  str(train_step_fn.step).rjust(6, '0'), total_loss, accuracy * 100))

     #   if train_step_fn.step == (FLAGS.max_steps - 1):
     #       accuracy = session.run(accuracy_test)
     #       print('%s - Loss: %.2f Accuracy: %.2f%%' % ('FINAL TEST', total_loss, accuracy * 100))

        train_step_fn.step += 1
        return [total_loss, should_stop]

    train_step_fn.step = 0
    train_step_fn.accuracy_validation = accuracy_validation

    # Start the training.
    slim.learning.train(
        train_tensor,
        train_step_fn=train_step_fn,
        logdir=FLAGS.train_logdir,
        log_every_n_steps=FLAGS.log_steps,
        master=FLAGS.master,
        number_of_steps=FLAGS.training_number_of_steps,
        is_chief=(FLAGS.task == 0),
        session_config=session_config,
        startup_delay_steps=startup_delay_steps,
        init_fn=train_utils.get_model_init_fn(
            FLAGS.train_logdir,
            FLAGS.tf_initial_checkpoint,
            FLAGS.initialize_last_layer,
            last_layers,
            ignore_missing_vars=True),
        summary_op=summary_op,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.eval_split,
                                               dataset_dir=FLAGS.dataset_dir)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    if FLAGS.eval_batch_size != 1:
        raise ValueError('Batch size {} is not allowed. '
                         'Inference graph can only be '
                         'evaluated image by image.'.format(
                             FLAGS.eval_batch_size))

    batch_size = 1

    g = tf.Graph()
    with g.as_default():

        samples = input_generator.get(dataset,
                                      FLAGS.eval_crop_size,
                                      batch_size,
                                      min_resize_value=FLAGS.min_resize_value,
                                      max_resize_value=FLAGS.max_resize_value,
                                      resize_factor=FLAGS.resize_factor,
                                      dataset_split=FLAGS.eval_split,
                                      is_training=False,
                                      model_variant=FLAGS.model_variant)

        graph_def = tf.GraphDef()
        with open(FLAGS.graph_path, "rb") as f:
            graph_def.ParseFromString(f.read())

        samples[common.IMAGE] = tf.cast(samples[common.IMAGE], tf.uint8)

        tf.import_graph_def(graph_def,
                            input_map={_INPUT_TENSOR: samples[common.IMAGE]})

        predictions = g.get_tensor_by_name('import/' + _OUTPUT_TENSOR)
        predictions = tf.reshape(predictions, shape=[-1])

        (_, summary_op, metrics_to_updates, confusion_matrix,
         category_iou) = eval.create_metrics(g, samples, dataset, predictions)

        tf.train.get_or_create_global_step()

        sv = tf.train.Supervisor(graph=g,
                                 logdir=FLAGS.eval_logdir,
                                 init_op=tf.global_variables_initializer(),
                                 summary_op=None,
                                 global_step=None,
                                 saver=None)

        log_steps = int(math.floor(dataset.num_samples / 10))

        with sv.managed_session(start_standard_services=False) as sess:
            sv.start_queue_runners(sess)

            for image_number in range(dataset.num_samples):
                if ((image_number + 1) % log_steps == 0
                        or image_number == dataset.num_samples - 1):
                    tf.logging.info('Evaluation [%d/%d]', image_number + 1,
                                    dataset.num_samples)

                sess.run([samples[common.IMAGE], metrics_to_updates.values()])

            sv.summary_computed(sess, sess.run(summary_op))
            sess.run([confusion_matrix, category_iou])
Beispiel #7
0
def main(unused_argv):
    # syaru: Sets the threshold(入口) for what messages will be logged. 加上这句才能输出训练过程的log.
    tf.logging.set_verbosity(tf.logging.INFO)
    # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
    # syaru: models/research/slim/deployment/model_deploy.DeploymentConfig(object)
    config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones,
                                           clone_on_cpu=FLAGS.clone_on_cpu,
                                           replica_id=FLAGS.task,
                                           num_replicas=FLAGS.num_replicas,
                                           num_ps_tasks=FLAGS.num_ps_tasks)

    # Split the batch across GPUs.
    assert FLAGS.train_batch_size % config.num_clones == 0, (
        'Training batch size not divisble by number of clones (GPUs).')

    clone_batch_size = int(FLAGS.train_batch_size / config.num_clones)

    # Get dataset-dependent information.
    """
  syaru: deeplab/datasets/segmentation_dataset.get_dataset()
  Gets an instance of slim Dataset.
  Args:
    dataset_name: Dataset name.
    split_name: A train/val Split name.
    dataset_dir: The directory of the dataset sources.
  """
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.train_split,
                                               dataset_dir=FLAGS.dataset_dir)

    tf.gfile.MakeDirs(
        FLAGS.train_logdir
    )  # sayru: FLAGS.train_logdir = "pascal_voc_seg/exp/train_on_trainval_set/train"
    tf.logging.info('Training on %s set',
                    FLAGS.train_split)  #        FLAGS.train_split = "trainval"

    with tf.Graph().as_default() as graph:
        with tf.device(
                config.inputs_device()
        ):  # syaru: deeplab/utils/input_generator.get(): This functions gets the dataset split for semantic segmentation.
            samples = input_generator.get(  # Returns: A dictionary of batched Tensors for semantic segmentation.
                dataset,  # Args: dataset: An instance of slim Dataset.
                FLAGS.
                train_crop_size,  #       train_crop_size: 如果定义了crop_size,那么在train时会对大于crop_size的图片进行随机裁剪
                clone_batch_size,
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                min_scale_factor=FLAGS.
                min_scale_factor,  # syaru: min_scale_factor: 'Minmum scale factor for data augmentation.'
                max_scale_factor=FLAGS.
                max_scale_factor,  # min_scale_factor: 'Maximum scale factor for data augmentation.'
                scale_factor_step_size=FLAGS.
                scale_factor_step_size,  # scale_factor_step_size: 'Scale factor step size for data augmentation.'(from minmum to maximum)
                dataset_split=FLAGS.train_split,
                is_training=True,
                model_variant=FLAGS.model_variant)
            # syaru: /tensorflow/contrib/slim/python/slim/data/prefetch_queue.py
            inputs_queue = prefetch_queue.prefetch_queue(  # tensors: A list or dictionary of `Tensors` to enqueue in the buffer.
                samples,
                capacity=128 * config.num_clones
            )  # capacity: An integer. The maximum number of elements in the queue.

        # Create the global step on the device storing the variables.
        with tf.device(config.variables_device()):
            global_step = tf.train.get_or_create_global_step()

            # Define the model and create clones.
            """
      syaru: 
      models/research/slim/deployment/model_deploy.create_clones():
      The `model_fn(*args, **kwargs)` function is called `config.num_clones` times to create the model clones.
      (and one or several clones are deployed on different GPUs and one or several replicas of such clones.)
      Then it return the scope and device in a namedtuple `Clone(outputs, scope, device)`.

      Args:
      config: A DeploymentConfig object.
      model_fn: A callable. Called as `model_fn(*args, **kwargs)`
      args: Optional list of arguments to pass to `model_fn`.
      kwargs: Optional list of keyword arguments to pass to `model_fn`..
      Returns:
      A list of namedtuples `Clone`.

      Note: it is assumed that any loss created by `model_fn` is collected at
      the tf.GraphKeys.LOSSES collection.

      To recover the losses, summaries or update_ops created by the clone use:
      ```python
      losses = tf.get_collection(tf.GraphKeys.LOSSES, clone.scope)
      summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, clone.scope)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, clone.scope)
      ```
      """
            model_fn = _build_deeplab
            model_args = (inputs_queue, {
                common.OUTPUT_TYPE: dataset.num_classes
            }, dataset.ignore_label)
            clones = model_deploy.create_clones(config,
                                                model_fn,
                                                args=model_args)

            # Gather update_ops from the first clone. These contain, for example,
            # the updates for the batch_norm variables created by model_fn.
            first_clone_scope = config.clone_scope(0)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                           first_clone_scope)

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for model variables.
        for model_var in slim.get_model_variables():
            summaries.add(tf.summary.histogram(model_var.op.name, model_var))

        # Add summaries for images, labels, semantic predictions
        if FLAGS.save_summaries_images:
            summary_image = graph.get_tensor_by_name(  # syaru: get_tensor_by_name(name): return tensor by specifily 'name'.
                ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/')
            )  # str.strip (): is used to remove the specified characters at the front/end of the string (the default is space).
            summaries.add(
                tf.summary.image('samples/%s' % common.IMAGE, summary_image))

            summary_label = tf.cast(
                graph.get_tensor_by_name(
                    ('%s/%s:0' %
                     (first_clone_scope, common.LABEL)).strip('/')), tf.uint8)
            summaries.add(
                tf.summary.image('samples/%s' % common.LABEL, summary_label))

            predictions = tf.cast(
                tf.expand_dims(
                    tf.argmax(
                        graph.get_tensor_by_name(  # syaru: tf.argmax(axis=3)
                            ('%s/%s:0' % (first_clone_scope,
                                          common.OUTPUT_TYPE)).strip('/')),
                        3),
                    -1),
                tf.uint8)
            summaries.add(
                tf.summary.image('samples/%s' % common.OUTPUT_TYPE,
                                 predictions))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Build the optimizer based on the device specification.
        with tf.device(config.optimizer_device()):
            # syaru: train_utils.get_model_learning_rate():
            #        Computes the model's learning rate for different learning policy("step" and "poly").
            learning_rate = train_utils.get_model_learning_rate(
                FLAGS.learning_policy, FLAGS.base_learning_rate,
                FLAGS.learning_rate_decay_step,
                FLAGS.learning_rate_decay_factor,
                FLAGS.training_number_of_steps, FLAGS.learning_power,
                FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
            optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                   FLAGS.momentum)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        with tf.device(config.variables_device()):
            # syaru: Compute clone losses and gradients for the given list of `Clones`.
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, optimizer)
            total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
            summaries.add(tf.summary.scalar('total_loss', total_loss))

            # Modify the gradients for biases and last layer variables.
            """
      syaru: 
      For the task of semantic segmentation, the models are
      usually fine-tuned from the models trained on the task of image
      classification. To fine-tune the models, we usually set larger (e.g.,
      10 times larger) learning rate for the parameters of last layer.
      
      deeplab/model/model.get_extra_layer_scopes():
      Returns: A list of scopes for extra layers. 

      deeplab/utils/train_utils.get_model_gradient_multipliers():
      Returns: The gradient multiplier map with variables as key, and multipliers as value.
      """
            last_layers = model.get_extra_layer_scopes(
                FLAGS.last_layers_contain_logits_only)
            grad_mult = train_utils.get_model_gradient_multipliers(
                last_layers, FLAGS.last_layer_gradient_multiplier)
            if grad_mult:
                grads_and_vars = slim.learning.multiply_gradients(
                    grads_and_vars, grad_mult)

            # Create gradient update op.
            grad_updates = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)
            update_ops.append(grad_updates)
            # syaru: tf.identity()和tf.group()均可将语句变为操作(ops).
            #        (我们需要`optimizer.apply_gradients`后才计算`total_loss`(as 'train_op'),而tf.control_dependencies()适用于tf.ops)
            #        And `update_ops = tf.get_collection(..)` only return a list of variables.
            update_op = tf.group(*update_ops)
            with tf.control_dependencies([update_op]):
                train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries))

        # Soft placement allows placing on CPU ops without GPU implementation.
        # syaru: set gpu_options
        gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.9)
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False,
                                        gpu_options=gpu_options)

        # Start the training.
        # syaru: /tensorflow/contrib/slim/python/slim/learning.py
        # train_utils.get_model_init_fn(): Gets the function initializing model variables from a checkpoint.
        slim.learning.train(
            train_tensor,
            logdir=FLAGS.train_logdir,
            log_every_n_steps=FLAGS.log_steps,
            master=FLAGS.master,
            number_of_steps=FLAGS.training_number_of_steps,
            is_chief=(FLAGS.task == 0),
            session_config=session_config,
            startup_delay_steps=startup_delay_steps,  # syaru:
            init_fn=train_utils.
            get_model_init_fn(  # `init_fn`: An optional callable to be executed after `init_op` is called. The
                FLAGS.
                train_logdir,  # callable must accept one argument, the session being initialized.
                FLAGS.tf_initial_checkpoint,
                FLAGS.initialize_last_layer,
                last_layers,
                ignore_missing_vars=True),
            summary_op=summary_op,
            save_summaries_secs=FLAGS.save_summaries_secs,
            save_interval_secs=FLAGS.save_interval_secs)
Beispiel #8
0
def main(unused_argv):

  datasetDescriptor = None
  if FLAGS.config and os.path.isfile(FLAGS.config):
    with open(FLAGS.config) as f:
      trainingConfig = json.load(f)
      for key in trainingConfig:
        if key in FLAGS:
          FLAGS[key].value = trainingConfig[key]
        elif key == 'DatasetDescriptor':
          datasetDescriptor = segmentation_dataset.DatasetDescriptor(
                                name=trainingConfig[key]['name'],
                                splits_to_sizes=trainingConfig[key]['splits_to_sizes'],
                                num_classes=trainingConfig[key]['num_classes'],
                                ignore_label=trainingConfig[key]['ignore_label'],
                              )

  assert FLAGS.dataset_dir, (
      'flag --dataset_dir=None: Flag --dataset_dir must be specified.')

  assert FLAGS.train_logdir, (
      'flag --train_logdir=None: Flag --train_logdir must be specified.')

  tf.logging.set_verbosity(tf.logging.INFO)
  # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
  config = model_deploy.DeploymentConfig(
      num_clones=FLAGS.num_clones,
      clone_on_cpu=FLAGS.clone_on_cpu,
      replica_id=FLAGS.task,
      num_replicas=FLAGS.num_replicas,
      num_ps_tasks=FLAGS.num_ps_tasks)

  # Split the batch across GPUs.
  assert FLAGS.train_batch_size % config.num_clones == 0, (
      'Training batch size not divisble by number of clones (GPUs).')

  clone_batch_size = FLAGS.train_batch_size // config.num_clones

  if datasetDescriptor is None:
    datasetDescriptor = FLAGS.dataset

  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      datasetDescriptor, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir)

  tf.gfile.MakeDirs(FLAGS.train_logdir)
  tf.logging.info('Training on %s set', FLAGS.train_split)

  with tf.Graph().as_default() as graph:
    with tf.device(config.inputs_device()):
      samples = input_generator.get(
          dataset,
          FLAGS.train_crop_size,
          clone_batch_size,
          min_resize_value=FLAGS.min_resize_value,
          max_resize_value=FLAGS.max_resize_value,
          resize_factor=FLAGS.resize_factor,
          min_scale_factor=FLAGS.min_scale_factor,
          max_scale_factor=FLAGS.max_scale_factor,
          scale_factor_step_size=FLAGS.scale_factor_step_size,
          dataset_split=FLAGS.train_split,
          is_training=True,
          model_variant=FLAGS.model_variant)
      inputs_queue = prefetch_queue.prefetch_queue(
          samples, capacity=128 * config.num_clones)

    # Create the global step on the device storing the variables.
    with tf.device(config.variables_device()):
      global_step = tf.train.get_or_create_global_step()

      # Define the model and create clones.
      model_fn = _build_deeplab
      model_args = (inputs_queue, {
          common.OUTPUT_TYPE: dataset.num_classes
      }, dataset.ignore_label)
      clones = model_deploy.create_clones(config, model_fn, args=model_args)

      # Gather update_ops from the first clone. These contain, for example,
      # the updates for the batch_norm variables created by model_fn.
      first_clone_scope = config.clone_scope(0)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    # Add summaries for model variables.
    for model_var in slim.get_model_variables():
      summaries.add(tf.summary.histogram(model_var.op.name, model_var))

    # Add summaries for images, labels, semantic predictions
    if FLAGS.save_summaries_images:
      summary_image = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
      summaries.add(
          tf.summary.image('samples/%s' % common.IMAGE, summary_image))

      first_clone_label = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
      # Scale up summary image pixel values for better visualization.
      pixel_scaling = max(1, 255 // dataset.num_classes)
      summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8)
      summaries.add(
          tf.summary.image('samples/%s' % common.LABEL, summary_label))

      first_clone_output = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
      predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1)

      summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8)
      summaries.add(
          tf.summary.image(
              'samples/%s' % common.OUTPUT_TYPE, summary_predictions))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Build the optimizer based on the device specification.
    with tf.device(config.optimizer_device()):
      learning_rate = train_utils.get_model_learning_rate(
          FLAGS.learning_policy, FLAGS.base_learning_rate,
          FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
          FLAGS.training_number_of_steps, FLAGS.learning_power,
          FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
      optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    with tf.device(config.variables_device()):
      total_loss, grads_and_vars = model_deploy.optimize_clones(
          clones, optimizer)
      total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
      summaries.add(tf.summary.scalar('total_loss', total_loss))

      # Modify the gradients for biases and last layer variables.
      last_layers = model.get_extra_layer_scopes(
          FLAGS.last_layers_contain_logits_only)
      grad_mult = train_utils.get_model_gradient_multipliers(
          last_layers, FLAGS.last_layer_gradient_multiplier)
      if grad_mult:
        grads_and_vars = slim.learning.multiply_gradients(
            grads_and_vars, grad_mult)

      # Create gradient update op.
      grad_updates = optimizer.apply_gradients(
          grads_and_vars, global_step=global_step)
      update_ops.append(grad_updates)
      update_op = tf.group(*update_ops)
      with tf.control_dependencies([update_op]):
        train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(
        tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries))

    # Soft placement allows placing on CPU ops without GPU implementation.
    session_config = tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=False)

    # Start the training.
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_logdir,
        log_every_n_steps=FLAGS.log_steps,
        master=FLAGS.master,
        number_of_steps=FLAGS.training_number_of_steps,
        is_chief=(FLAGS.task == 0),
        session_config=session_config,
        startup_delay_steps=startup_delay_steps,
        init_fn=train_utils.get_model_init_fn(
            FLAGS.train_logdir,
            FLAGS.tf_initial_checkpoint,
            FLAGS.initialize_last_layer,
            last_layers,
            ignore_missing_vars=True),
        summary_op=summary_op,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
  config = model_deploy.DeploymentConfig(
      num_clones=FLAGS.num_clones,
      clone_on_cpu=FLAGS.clone_on_cpu,
      replica_id=FLAGS.task,
      num_replicas=FLAGS.num_replicas,
      num_ps_tasks=FLAGS.num_ps_tasks)

  # Split the batch across GPUs.
  assert FLAGS.train_batch_size % config.num_clones == 0, (
      'Training batch size not divisble by number of clones (GPUs).')

  clone_batch_size = FLAGS.train_batch_size / config.num_clones

  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir)

  tf.gfile.MakeDirs(FLAGS.train_logdir)
  tf.logging.info('Training on %s set', FLAGS.train_split)

  with tf.Graph().as_default():
    with tf.device(config.inputs_device()):
      samples = input_generator.get(
          dataset,
          FLAGS.train_crop_size,
          clone_batch_size,
          min_resize_value=FLAGS.min_resize_value,
          max_resize_value=FLAGS.max_resize_value,
          resize_factor=FLAGS.resize_factor,
          min_scale_factor=FLAGS.min_scale_factor,
          max_scale_factor=FLAGS.max_scale_factor,
          scale_factor_step_size=FLAGS.scale_factor_step_size,
          dataset_split=FLAGS.train_split,
          is_training=True,
          model_variant=FLAGS.model_variant)
      inputs_queue = prefetch_queue.prefetch_queue(
          samples, capacity=128 * config.num_clones)

    # Create the global step on the device storing the variables.
    with tf.device(config.variables_device()):
      global_step = tf.train.get_or_create_global_step()

      # Define the model and create clones.
      model_fn = _build_deeplab
      model_args = (inputs_queue, {
          common.OUTPUT_TYPE: dataset.num_classes
      }, dataset.ignore_label)
      clones = model_deploy.create_clones(config, model_fn, args=model_args)

      # Gather update_ops from the first clone. These contain, for example,
      # the updates for the batch_norm variables created by model_fn.
      first_clone_scope = config.clone_scope(0)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    # Add summaries for model variables.
    for model_var in slim.get_model_variables():
      summaries.add(tf.summary.histogram(model_var.op.name, model_var))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Build the optimizer based on the device specification.
    with tf.device(config.optimizer_device()):
      learning_rate = train_utils.get_model_learning_rate(
          FLAGS.learning_policy, FLAGS.base_learning_rate,
          FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
          FLAGS.training_number_of_steps, FLAGS.learning_power,
          FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
      optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    with tf.device(config.variables_device()):
      total_loss, grads_and_vars = model_deploy.optimize_clones(
          clones, optimizer)
      total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
      summaries.add(tf.summary.scalar('total_loss', total_loss))

      # Modify the gradients for biases and last layer variables.
      last_layers = model.get_extra_layer_scopes()
      grad_mult = train_utils.get_model_gradient_multipliers(
          last_layers, FLAGS.last_layer_gradient_multiplier)
      if grad_mult:
        grads_and_vars = slim.learning.multiply_gradients(
            grads_and_vars, grad_mult)

      # Create gradient update op.
      grad_updates = optimizer.apply_gradients(
          grads_and_vars, global_step=global_step)
      update_ops.append(grad_updates)
      update_op = tf.group(*update_ops)
      with tf.control_dependencies([update_op]):
        train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(
        tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries))

    # Soft placement allows placing on CPU ops without GPU implementation.
    session_config = tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=False)
    session_config.gpu_options.allow_growth = FLAGS.gpu_allow_growth

    # Save checkpoints regularly.
    saver = tf.train.Saver(max_to_keep=FLAGS.max_to_keep)

    # Start the training.
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_logdir,
        log_every_n_steps=FLAGS.log_steps,
        master=FLAGS.master,
        number_of_steps=FLAGS.training_number_of_steps,
        is_chief=(FLAGS.task == 0),
        session_config=session_config,
        startup_delay_steps=startup_delay_steps,
        init_fn=train_utils.get_model_init_fn(
            FLAGS.train_logdir,
            FLAGS.tf_initial_checkpoint,
            FLAGS.initialize_last_layer,
            last_layers,
            ignore_missing_vars=True),
        summary_op=summary_op,
        saver=saver,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs)
Beispiel #10
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.eval_split,
                                               dataset_dir=FLAGS.dataset_dir)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    ckpt = tf.train.get_checkpoint_state(FLAGS.checkpoint_dir)
    # vars_to_restore = get_tensors_in_checkpoint_file(file_name=ckpt.model_checkpoint_path)
    # print([v.name for v in tf.global_variables()])
    # vars_to_restore = [v for v in tf.global_variables() if v.name[:-2] in vars_to_restore]

    with tf.Graph().as_default():
        samples = input_generator.get(dataset,
                                      FLAGS.eval_crop_size,
                                      FLAGS.eval_batch_size,
                                      min_resize_value=FLAGS.min_resize_value,
                                      max_resize_value=FLAGS.max_resize_value,
                                      resize_factor=FLAGS.resize_factor,
                                      dataset_split=FLAGS.eval_split,
                                      is_training=False,
                                      model_variant=FLAGS.model_variant)

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=FLAGS.eval_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])
        labels = tf.reshape(samples[common.LABEL], shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights = 0.
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        for eval_scale in FLAGS.eval_scales:
            predictions_tag += '_' + str(eval_scale)
        if FLAGS.add_flipped_images:
            predictions_tag += '_flipped'
        prediction_tag_2 = 'iou_per_class'
        prediction_tag_confMatrix = 'confusion_matrix'

        # segnet iou in numpy
        def fast_hist(gt, pred, n_clss):
            # true false mask where gt is valid
            # k = (gt >= 0) & (gt < n_clss)
            # return tf.reshape(tf.bincount(n_clss * tf.cast(gt[k], tf.int8) + pred[k], minlength=n_clss ** 2),
            #                   [n_clss, n_clss])
            print(type(gt))
            print(type(pred))
            return tf.reshape(
                tf.bincount(tf.add(tf.multiply(n_clss, gt),
                                   tf.cast(pred, tf.int32)),
                            minlength=n_clss**2), [n_clss, n_clss])

        def get_hist(predictions, labels, num_class, batch_size):
            print(predictions.shape)
            # num_class = predictions.shape[3]
            # batch_size = predictions.shape[0]
            hist = tf.zeros((num_class, num_class), dtype=tf.int32)
            print(labels.shape)
            print(predictions.shape)
            for i in range(batch_size):
                # hist += fast_hist(labels[i], predictions[i], num_class)
                hist += fast_hist(labels[i], predictions[i], num_class)
            return hist

        # Define the evaluation metric.
        metric_map = {}
        metric_map[predictions_tag] = tf.metrics.mean_iou(predictions,
                                                          labels,
                                                          dataset.num_classes,
                                                          weights=weights)

        # metric_map[prediction_tag_2] = tf.map_fn(get_hist, [predictions, labels,
        #                                         dataset.num_classes, FLAGS.eval_batch_size])
        # metric_map[prediction_tag_2] = get_hist(predictions, labels,
        #                                         num_class=dataset.num_classes, batch_size=FLAGS.eval_batch_size)

        # wieso geht das nicht?
        # metric_map[prediction_tag_confMatrix] = tf.contrib.metrics.confusion_matrix(labels, predictions,
        #                                     num_classes=dataset.num_classes)

        # metric_map['precision'] = tf.contrib.metrics.streaming_precision(predictions, labels)
        metric_map['accuracy'] = tf.metrics.accuracy(labels, predictions)
        # compute each class's iou thx 2 MrZhousf :D
        mean_iou_v, update_op = my_metrics.iou(predictions,
                                               labels,
                                               dataset.num_classes,
                                               weights=weights)
        acc_v, update_op2 = my_metrics.acc(predictions,
                                           labels,
                                           dataset.num_classes,
                                           weights=weights)
        for index in range(0, dataset.num_classes):
            metric_map[str(index)+'_' + segmentation_dataset.get_classname(FLAGS.dataset, index) + '_iou'] =\
                (mean_iou_v[index], update_op[index])
            metric_map[str(index)+'_' + segmentation_dataset.get_classname(FLAGS.dataset, index) + '_acc'] =\
                (acc_v[index], update_op2[index])

        metrics_to_values, metrics_to_updates = (
            tf.contrib.metrics.aggregate_metric_map(metric_map))

        for metric_name, metric_value in six.iteritems(metrics_to_values):
            slim.summaries.add_scalar_summary(metric_value,
                                              metric_name,
                                              print_summary=True)

        num_batches = int(
            math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))

        tf.logging.info('Eval num images %d', dataset.num_samples)
        tf.logging.info('Eval batch size %d and num batch %d',
                        FLAGS.eval_batch_size, num_batches)

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations
        slim.evaluation.evaluation_loop(
            master=FLAGS.master,
            checkpoint_dir=FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_logdir,
            num_evals=num_batches,
            eval_op=list(metrics_to_updates.values()),
            max_number_of_evaluations=num_eval_iters,
            eval_interval_secs=FLAGS.eval_interval_secs)
Beispiel #11
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    if FLAGS.batch_iter < 1:
        FLAGS.batch_iter = 1
    if FLAGS.batch_iter != 1:
        if not (FLAGS.num_clones == 1 and FLAGS.num_replicas == 1):
            raise NotImplementedError(
                "train.py: **NOTE** -- train_utils.train_step_custom may not work with parallel GPUs / clones > 1! Be sure you are only using one GPU."
            )

    print('\ntrain.py: Accumulating gradients over {} iterations\n'.format(
        FLAGS.batch_iter))

    # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
    config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones,
                                           clone_on_cpu=FLAGS.clone_on_cpu,
                                           replica_id=FLAGS.task,
                                           num_replicas=FLAGS.num_replicas,
                                           num_ps_tasks=FLAGS.num_ps_tasks)

    # Split the batch across GPUs.
    assert FLAGS.train_batch_size % config.num_clones == 0, (
        'Training batch size not divisble by number of clones (GPUs).')

    clone_batch_size = FLAGS.train_batch_size // config.num_clones

    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(
        FLAGS.dataset,
        FLAGS.train_split,
        dataset_dir=FLAGS.dataset_dir,
    )

    tf.gfile.MakeDirs(FLAGS.train_logdir)
    tf.logging.info('Training on %s set', FLAGS.train_split)

    with tf.Graph().as_default() as graph:
        with tf.device(config.inputs_device()):
            samples = input_generator.get(
                dataset,
                FLAGS.train_crop_size,
                clone_batch_size,
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                min_scale_factor=FLAGS.min_scale_factor,
                max_scale_factor=FLAGS.max_scale_factor,
                scale_factor_step_size=FLAGS.scale_factor_step_size,
                dataset_split=FLAGS.train_split,
                is_training=True,
                model_variant=FLAGS.model_variant)
            inputs_queue = prefetch_queue.prefetch_queue(samples,
                                                         capacity=128 *
                                                         config.num_clones)

        # Create the global step on the device storing the variables.
        with tf.device(config.variables_device()):
            global_step = tf.train.get_or_create_global_step()

            # Define the model and create clones.
            model_fn = _build_deeplab
            if FLAGS.class_balanced_loss:
                print(
                    'train.py: class_balanced_loss=True. Reading loss weights from segmentation_dataset.py'
                )
            else:
                print(
                    'train.py: class_balanced_loss=False. Setting loss weights to 1.0 for every class.'
                )
                dataset.loss_weight = 1.0

            #_build_deeplab has model args:
            #(inputs_queue, outputs_to_num_classes, ignore_label, loss_weight):

            outputs_to_num_classes = {common.OUTPUT_TYPE: dataset.num_classes}

            model_args = (inputs_queue,\
                          outputs_to_num_classes,
                          dataset.ignore_label, dataset.loss_weight)
            clones = model_deploy.create_clones(config,
                                                model_fn,
                                                args=model_args)

            # Gather update_ops from the first clone. These contain, for example,
            # the updates for the batch_norm variables created by model_fn.
            first_clone_scope = config.clone_scope(0)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                           first_clone_scope)

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for model variables.
        for model_var in slim.get_model_variables():
            summaries.add(tf.summary.histogram(model_var.op.name, model_var))

        # Add summaries for images, labels, semantic predictions
        if FLAGS.save_summaries_images:
            summary_image = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
            summaries.add(
                tf.summary.image('samples/%s' % common.IMAGE, summary_image))

            first_clone_label = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
            # Scale up summary image pixel values for better visualization.
            pixel_scaling = max(1, 255 // dataset.num_classes)
            summary_label = tf.cast(first_clone_label * pixel_scaling,
                                    tf.uint8)
            summaries.add(
                tf.summary.image('samples/%s' % common.LABEL, summary_label))

            first_clone_output = graph.get_tensor_by_name(
                ('%s/%s:0' %
                 (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
            predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1)

            summary_predictions = tf.cast(predictions * pixel_scaling,
                                          tf.uint8)
            summaries.add(
                tf.summary.image('samples/%s' % common.OUTPUT_TYPE,
                                 summary_predictions))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Build the optimizer based on the device specification.
        with tf.device(config.optimizer_device()):
            learning_rate = train_utils.get_model_learning_rate(
                FLAGS.learning_policy, FLAGS.base_learning_rate,
                FLAGS.learning_rate_decay_step,
                FLAGS.learning_rate_decay_factor,
                FLAGS.training_number_of_steps, FLAGS.learning_power,
                FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
            optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                   FLAGS.momentum)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps
        for variable in slim.get_model_variables():
            summaries.add(tf.summary.histogram(variable.op.name, variable))

        with tf.device(config.variables_device()):

            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, optimizer)
            total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')

            # Modify the gradients for biases and last layer variables.
            last_layers = model.get_extra_layer_scopes(
                FLAGS.last_layers_contain_logits_only)
            grad_mult = train_utils.get_model_gradient_multipliers(
                last_layers, FLAGS.last_layer_gradient_multiplier)
            if grad_mult:
                grads_and_vars = slim.learning.multiply_gradients(
                    grads_and_vars, grad_mult)

            if FLAGS.batch_iter <= 1:
                FLAGS.batch_iter = 0
                summaries.add(tf.summary.scalar('total_loss', total_loss))
                grad_updates = optimizer.apply_gradients(
                    grads_and_vars, global_step=global_step)
                update_ops.append(grad_updates)
                update_op = tf.group(*update_ops)
                with tf.control_dependencies([update_op]):
                    train_tensor = tf.identity(total_loss, name='train_op')
                accum_tensor = None
            else:

                ############ Accumulate grads_and_vars op. ####################
                accum_update_ops = list(update_ops)  #.copy()
                # Create (grad, var) list to accumulate gradients in. Inititalize to 0.
                accum_grads_and_vars = [
                    (tf.Variable(tf.zeros_like(gv[0]),
                                 trainable=False,
                                 name=gv[0].name.strip(":0") + "_accum"),
                     gv[1]) for gv in grads_and_vars
                ]
                assert len(accum_grads_and_vars) == len(grads_and_vars)

                total_loss_accum = tf.Variable(0.0,
                                               dtype=tf.float32,
                                               trainable=False)
                accum_loss_update_op = [
                    total_loss_accum.assign_add(total_loss)
                ]
                accum_update_ops.append(accum_loss_update_op)

                ## Accumulate gradients: accum_grad[i] += (grad[i] / FLAGS.batch_iter)  # scaled gradients.
                accum_ops = [
                    accum_grads_and_vars[i][0].assign_add(
                        tf.div(gv[0], 1.0 * FLAGS.batch_iter))
                    for i, gv in enumerate(grads_and_vars)
                ]
                accum_update_ops.append(accum_ops)

                accum_update_op = tf.group(*accum_update_ops)
                with tf.control_dependencies([accum_update_op]):
                    accum_print_ops = []
                    if FLAGS.batch_iter_verbose:
                        accum_print_ops.extend([
                            tf.Print(
                                tf.constant(0), [tf.add(global_step, 1)],
                                message=
                                'train.py: accumulating gradients for step: '),
                            #tf.Print(total_loss, [total_loss], message='    step total_loss: ')
                            #tf.Print(tf.constant(0), [accum_grads_and_vars[0][0]], message='    '),
                        ])
                    accum_update_ops.append(accum_print_ops)
                    with tf.control_dependencies([tf.group(*accum_print_ops)]):
                        accum_tensor = tf.identity(total_loss_accum,
                                                   name='accum_op')

                ##################### Train op (apply [accumulated] grads and vars) ###############################
                train_update_ops = list(update_ops)  #.copy()
                ## Create gradient update op.
                # Apply gradients from accumulated gradients
                grad_updates = optimizer.apply_gradients(
                    accum_grads_and_vars, global_step=global_step)
                train_update_ops.append(grad_updates)

                grad_print_ops = []
                if FLAGS.batch_iter_verbose:
                    grad_print_ops.extend([
                        #                tf.Print(tf.constant(0), [grads_and_vars[0][0], grads_and_vars[0][1]], message='---grads[0] and vars[0]---------\n'),
                        #tf.Print(tf.constant(0), [], message=grads_and_vars[0][1].name),
                        tf.Print(tf.constant(0), [accum_grads_and_vars[0][0]],
                                 message='GRADS  BEFORE ZERO: ')
                    ])
                train_update_ops.append(grad_print_ops)

                total_loss_accum_average = tf.div(total_loss_accum,
                                                  FLAGS.batch_iter)
                summaries.add(
                    tf.summary.scalar('total_loss', total_loss_accum_average))

                train_update_op = tf.group(*train_update_ops)
                with tf.control_dependencies([train_update_op]):
                    zero_ops = []

                    zero_accum_ops = [
                        agv[0].assign(tf.zeros_like(agv[0]))
                        for agv in accum_grads_and_vars
                    ]
                    zero_ops.append(zero_accum_ops)

                    zero_accum_total_loss_op = [total_loss_accum.assign(0)]
                    zero_ops.append(zero_accum_total_loss_op)

                    zero_op = tf.group(*zero_ops)
                    with tf.control_dependencies([zero_op]):
                        grad_print_ops = []
                        if FLAGS.batch_iter_verbose:
                            grad_print_ops.extend([
                                #tf.Print(tf.constant(0), [accum_grads_and_vars[0][0]], message='GRADS AFTER ZERO ')
                            ])
                        with tf.control_dependencies(
                            [tf.group(*grad_print_ops)]):
                            train_tensor = tf.identity(
                                total_loss_accum_average, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or
        # _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries))

        # Soft placement allows placing on CPU ops without GPU implementation.
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)

        session_config.gpu_options.allow_growth = True

        #train_step_exit = train_utils.train_step_exit
        train_step_custom = train_utils.train_step_custom
        if FLAGS.validation_interval <= 0:
            FLAGS.validation_interval = FLAGS.training_number_of_steps
        else:
            print("*** Validation interval: {} ***".format(
                FLAGS.validation_interval))

        # Start the training.
        slim.learning.train(train_tensor,
                            logdir=FLAGS.train_logdir,
                            train_step_fn=train_step_custom(
                                VALIDATION_N=FLAGS.validation_interval,
                                ACCUM_OP=accum_tensor,
                                ACCUM_STEPS=FLAGS.batch_iter),
                            log_every_n_steps=FLAGS.log_steps,
                            master=FLAGS.master,
                            number_of_steps=FLAGS.training_number_of_steps,
                            is_chief=(FLAGS.task == 0),
                            session_config=session_config,
                            startup_delay_steps=startup_delay_steps,
                            init_fn=train_utils.get_model_init_fn(
                                FLAGS.train_logdir,
                                FLAGS.tf_initial_checkpoint,
                                FLAGS.initialize_last_layer,
                                last_layers,
                                ignore_missing_vars=True),
                            summary_op=summary_op,
                            save_summaries_secs=FLAGS.save_summaries_secs,
                            save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
    config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones,
                                           clone_on_cpu=FLAGS.clone_on_cpu,
                                           replica_id=FLAGS.task,
                                           num_replicas=FLAGS.num_replicas,
                                           num_ps_tasks=FLAGS.num_ps_tasks)

    # Split the batch across GPUs.
    assert FLAGS.train_batch_size % config.num_clones == 0, (
        'Training batch size not divisble by number of clones (GPUs).')

    clone_batch_size = FLAGS.train_batch_size // config.num_clones

    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.train_split,
                                               dataset_dir=FLAGS.dataset_dir)

    with tf.Graph().as_default() as graph:
        with tf.device(config.inputs_device()):
            samples = input_generator.get(
                dataset,
                FLAGS.train_crop_size,
                clone_batch_size,
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                min_scale_factor=FLAGS.min_scale_factor,
                max_scale_factor=FLAGS.max_scale_factor,
                scale_factor_step_size=FLAGS.scale_factor_step_size,
                dataset_split=FLAGS.train_split,
                is_training=True,
                model_variant=FLAGS.model_variant)
            inputs_queue = prefetch_queue.prefetch_queue(samples,
                                                         capacity=128 *
                                                         config.num_clones)

            samples = inputs_queue.dequeue()

            # Add name to input and label nodes so we can add to summary.
            samples[common.IMAGE] = tf.identity(samples[common.IMAGE],
                                                name=common.IMAGE)
            samples[common.LABEL] = tf.identity(samples[common.LABEL],
                                                name=common.LABEL)

            print(samples)

        # Create the global step on the device storing the variables.
        with tf.device(config.variables_device()):
            global_step = tf.train.get_or_create_global_step()

        init = tf.global_variables_initializer()
        with tf.Session() as session:
            session.run(init)

            coord = tf.train.Coordinator()
            threads = tf.train.start_queue_runners(coord=coord)

            print('Start verification process...')
            try:
                while True:
                    out_image, out_label = session.run(
                        [samples[common.IMAGE], samples[common.LABEL]])

                    #write_file("out_label.csv",np.squeeze(out_label[0], axis=2))

                    cv2.imshow(
                        'out_image',
                        cv2.cvtColor(out_image[0] / 255, cv2.COLOR_RGB2BGR))
                    cv2.imshow('out_label',
                               np.asarray(out_label[0] * 100, dtype=np.uint8))

                    colored_label = get_dataset_colormap.label_to_color_image(
                        np.squeeze(out_label[0]),
                        dataset=get_dataset_colormap.get_pascal_name())
                    cv2.imshow(
                        "colored_label",
                        cv2.cvtColor(colored_label.astype(np.uint8),
                                     cv2.COLOR_RGB2BGR))

                    alpha = 0.5
                    img_add = cv2.addWeighted(out_image[0], alpha,
                                              colored_label.astype(np.float32),
                                              1 - alpha, 0)
                    cv2.imshow("colored_overlap",
                               cv2.cvtColor(img_add, cv2.COLOR_RGB2BGR) / 255)
                    cv2.waitKey(0)

            except tf.errors.OutOfRangeError:
                print("end!")

            coord.request_stop()
            coord.join(threads)
Beispiel #13
0
def process_one(filepath):

    movefile(filepath)
    record.write_record(None, reader)

    # with get_prepare_graph().as_default():
    #     samples, predictions = do_prepare()
    #
    # with vis_graph.as_default():
    #     tf.train.get_or_create_global_step()
    #     sv = get_supervisor()
    #     with sv.managed_session(A_master, start_standard_services=False) as sess:
    #         do_process_batch(sess, samples, predictions)
    #         skel_extract.extract()
    #         skel_extract.load()

    vis_graph = tf.Graph()
    with vis_graph.as_default():
        dataset = segmentation_dataset.get_dataset(A_dataset,
                                                   A_vis_split,
                                                   dataset_dir=A_dataset_dir)
        samples = input_generator.get(dataset,
                                      A_vis_crop_size,
                                      A_vis_batch_size,
                                      min_resize_value=A_min_resize_value,
                                      max_resize_value=A_max_resize_value,
                                      resize_factor=A_resize_factor,
                                      dataset_split=A_vis_split,
                                      is_training=False,
                                      model_variant=A_model_variant)
        model_options = mycommon.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=A_vis_crop_size,
            atrous_rates=A_atrous_rates,
            output_stride=A_output_stride)
        print(samples[common.IMAGE])

        predictions = model.predict_labels(samples[common.IMAGE],
                                           model_options=model_options,
                                           image_pyramid=A_image_pyramid)

        predictions = predictions[common.OUTPUT_TYPE]

        tf.train.get_or_create_global_step()
        vis_session = tf.Session(graph=vis_graph)
        saver = tf.train.Saver(slim.get_variables_to_restore())
        sv = tf.train.Supervisor(graph=vis_graph,
                                 logdir=A_vis_logdir,
                                 init_op=tf.global_variables_initializer(),
                                 summary_op=None,
                                 summary_writer=None,
                                 global_step=None,
                                 saver=saver)
        with sv.managed_session('', start_standard_services=False) as sess:
            sv.start_queue_runners(sess)
            sv.saver.restore(sess,
                             tf.train.latest_checkpoint(A_checkpoint_dir))
            #samples, predictions = do_prepare()
            #tf.train.get_or_create_global_step()
            #do_process_batch(get_session(), samples, predictions)
            save_dir = os.path.join(work_dir, _SEMANTIC_PREDICTION_SAVE_FOLDER)

            my_process_batch(sess, samples[common.ORIGINAL_IMAGE], predictions,
                             samples[common.IMAGE_NAME],
                             samples[common.HEIGHT], samples[common.WIDTH],
                             save_dir)
Beispiel #14
0
def main(unused_argv):
    # input('in vis.main:')
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    # FLAGS.dataset: homeplus
    # FLAGS.vis_split: val
    # FLAGS.dataset_dir: tfrecord
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.vis_split,
                                               dataset_dir=FLAGS.dataset_dir)

    # input('dataset finish')
    train_id_to_eval_id = None
    # if dataset.name == segmentation_dataset.get_cityscapes_dataset_name():
    #   tf.logging.info('Cityscapes requires converting train_id to eval_id.')
    #   train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID

    # Prepare for visualization.
    tf.gfile.MakeDirs(FLAGS.vis_logdir)
    save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(save_dir)
    raw_save_dir = os.path.join(FLAGS.vis_logdir,
                                _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(raw_save_dir)

    tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

    input('prepare finished')

    g = tf.Graph()
    with g.as_default():
        # get preprocessed data
        print('min_resize_value: %s' % str(FLAGS.min_resize_value))
        print('max_resize_value: %s' % str(FLAGS.max_resize_value))
        input('--')
        print(FLAGS.vis_crop_size)
        print(FLAGS.vis_batch_size)
        print(FLAGS.min_resize_value)
        print(FLAGS.max_resize_value)
        print(FLAGS.resize_factor)
        print(FLAGS.vis_split)
        print(FLAGS.model_variant)
        print(FLAGS.atrous_rates)
        print(FLAGS.output_stride)
        # print(FLAGS)
        samples = input_generator.get(
            dataset,  # a dataset
            FLAGS.vis_crop_size,  # [1505, 2049]
            FLAGS.vis_batch_size,  # 1
            min_resize_value=FLAGS.min_resize_value,  # None
            max_resize_value=FLAGS.max_resize_value,  # None
            resize_factor=FLAGS.resize_factor,
            dataset_split=FLAGS.vis_split,  # val
            is_training=False,
            model_variant=FLAGS.model_variant)  # xception 65

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=FLAGS.vis_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)
        print(samples[common.IMAGE])

        input('before predict')
        # maybe predict
        print(dataset.num_classes)
        print(FLAGS.vis_crop_size)
        print(FLAGS.atrous_rates)
        print(FLAGS.output_stride)
        input()
        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            # images: A tensor of size [batch, height, width, channels]
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options=model_options,
                image_pyramid=FLAGS.image_pyramid)
        predictions = predictions[common.OUTPUT_TYPE]
        print(predictions.shape)
        print(predictions.dtype)
        input()

        input('predictions finish')
        if FLAGS.min_resize_value and FLAGS.max_resize_value:
            input('not pos')
            # Only support batch_size = 1, since we assume the dimensions of original
            # image after tf.squeeze is [height, width, 3].
            assert FLAGS.vis_batch_size == 1

            # Reverse the resizing and padding operations performed in preprocessing.
            # First, we slice the valid regions (i.e., remove padded region) and then
            # we reisze the predictions back.
            original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE])
            original_image_shape = tf.shape(original_image)
            predictions = tf.slice(
                predictions, [0, 0, 0],
                [1, original_image_shape[0], original_image_shape[1]])
            resized_shape = tf.to_int32([
                tf.squeeze(samples[common.HEIGHT]),
                tf.squeeze(samples[common.WIDTH])
            ])
            predictions = tf.squeeze(
                tf.image.resize_images(
                    tf.expand_dims(predictions, 3),
                    resized_shape,
                    method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                    align_corners=True), 3)
        input('before saver')
        tf.train.get_or_create_global_step()
        saver = tf.train.Saver(slim.get_variables_to_restore())
        sv = tf.train.Supervisor(graph=g,
                                 logdir=FLAGS.vis_logdir,
                                 init_op=tf.global_variables_initializer(),
                                 summary_op=None,
                                 summary_writer=None,
                                 global_step=None,
                                 saver=saver)

        print('dataset.num_samples: ' + str(dataset.num_samples))
        print('FLAGS.vis_batch_size: ' + str(FLAGS.vis_batch_size))
        # input()
        num_batches = int(
            math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size)))
        last_checkpoint = None

        # Loop to visualize the results when new checkpoint is created.
        # last_checkpoint = slim.evaluation.wait_for_new_checkpoint(FLAGS.checkpoint_dir, last_checkpoint)
        tf.logging.info('Starting visualization at ' +
                        time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
        tf.logging.info('Visualizing with model %s', last_checkpoint)

        # start nvidia in opening session
        with sv.managed_session(FLAGS.master,
                                start_standard_services=False) as sess:
            input('in session')
            print(FLAGS.checkpoint_dir)
            sv.start_queue_runners(sess)
            my_checkpoint = tf.train.latest_checkpoint(FLAGS.checkpoint_dir)
            sv.saver.restore(sess, my_checkpoint)
            # sv.saver.restore(sess, last_checkpoint)

            tf.logging.info('Visualizing batch %d / %d', 1, num_batches)

            input('before batch')
            # save one prediction png
            _process_batch(sess=sess,
                           original_images=samples[common.ORIGINAL_IMAGE],
                           semantic_predictions=predictions,
                           image_names=samples[common.IMAGE_NAME],
                           image_heights=samples[common.HEIGHT],
                           image_widths=samples[common.WIDTH],
                           image_id_offset=0,
                           save_dir=save_dir,
                           raw_save_dir=raw_save_dir,
                           train_id_to_eval_id=train_id_to_eval_id)

            tf.logging.info('Finished visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
Beispiel #15
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(
        FLAGS.dataset,
        FLAGS.vis_split,
        dataset_dir=FLAGS.dataset_dir,
        use_input_hints=FLAGS.input_hints,
        hint_types=FLAGS.hint_types)

    train_id_to_eval_id = None
    if dataset.name == segmentation_dataset.get_cityscapes_dataset_name(
    ) and FLAGS.convert_to_eval_id:
        tf.logging.info('Cityscapes requires converting train_id to eval_id.')
        train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID

    # Prepare for visualization.
    tf.gfile.MakeDirs(FLAGS.vis_logdir)
    save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(save_dir)
    raw_save_dir = os.path.join(FLAGS.vis_logdir,
                                _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(raw_save_dir)

    logit_save_dir = os.path.join(FLAGS.vis_logdir, 'logits')
    tf.gfile.MakeDirs(logit_save_dir)

    uncertainty_save_dir = os.path.join(FLAGS.vis_logdir, 'uncertainties')
    tf.gfile.MakeDirs(uncertainty_save_dir)

    tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

    g = tf.Graph()
    with g.as_default():
        # Running samples_orig will grab a new batch
        samples_orig = input_generator.get(
            dataset,
            FLAGS.vis_crop_size,
            FLAGS.vis_batch_size,
            min_resize_value=FLAGS.min_resize_value,
            max_resize_value=FLAGS.max_resize_value,
            resize_factor=FLAGS.resize_factor,
            dataset_split=FLAGS.vis_split,
            is_training=False,
            model_variant=FLAGS.model_variant)
        # samples_placeholders will represent a batch of data for the network. The values will be filled
        # by samples_orig. Decoupled so that same batch can be run through network multiple times.
        # See _process_batch.
        samples_placeholders = {}
        for k, v in samples_orig.items():
            samples_placeholders[k] = tf.placeholder(
                dtype=v.dtype, shape=v.shape, name='samples_{}'.format(k))

        # Since original code was written with 'samples' variable, leave original code alone and initialize samples dictionary here
        # The reason we don't use samples = samples_placeholders is because samples is overwritten several times
        # and we need to keep samples_placeholders in its original state in order to fill it with values from samples_orig.
        samples = {k: v for k, v in samples_placeholders.items()}

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=FLAGS.vis_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        if FLAGS.input_hints:  # or if common.HINT in samples.keys():
            if 'dynamic_class_partial_boundary_hint' in FLAGS.hint_types:
                assert len(
                    FLAGS.hint_types
                ) == 1, 'When using dynamic partial boundary class hints, do not use other hint types!'
                print("----")
                print(
                    "eval.py: Partial boundary hints with grid {}x{}.".format(
                        FLAGS.dynamic_class_partial_boundary_hint_B,
                        FLAGS.dynamic_class_partial_boundary_hint_B))
                print("eval.py: Drawing blocks with p {}.".format(
                    FLAGS.dynamic_class_partial_boundary_hint_p))
                if FLAGS.dynamic_class_partial_boundary_full_block:
                    print(
                        "eval.py: Keeping entire block instead of masking boundaries."
                        .format(FLAGS.boundary_threshold))
                else:
                    print(
                        "eval.py: Masking with boundary threshold {}.".format(
                            FLAGS.boundary_threshold))

                print("----")

                if FLAGS.dynamic_class_partial_boundary_full_block:
                    boundary_mask = tf.cast(
                        tf.ones_like(samples[common.LABEL]), tf.uint8)
                else:
                    boundary_mask = tf.cast(
                        tf.less(samples[common.BOUNDARY_DMAP],
                                FLAGS.boundary_threshold), tf.uint8)

                class_hints, hinted = tf.py_func(
                    func=train_utils.generate_class_partial_boundaries_helper(
                        B=FLAGS.dynamic_class_partial_boundary_hint_B,
                        p=FLAGS.dynamic_class_partial_boundary_hint_p),
                    inp=[samples[common.LABEL], boundary_mask],
                    Tout=[tf.uint8, tf.bool])
                samples[common.HINT] = class_hints
                samples[common.HINT].set_shape(
                    samples[common.LABEL].get_shape().as_list())
                # Now preprocess this. Set the flag so that  the rest of the work will be done as usual.
                FLAGS.hint_types = ['class_hint']
            ###

            if 'dynamic_class_hint' in FLAGS.hint_types:
                assert len(
                    FLAGS.hint_types
                ) == 1, 'When using dynamic class hints, do not use other hint types!'
                print("----")
                print(
                    "WARNING: Do not use dynamic class hints when simulating crowdsourced points as the points should not change between runs."
                )
                print("vis.py: Drawing hints with geo mean {}.".format(
                    FLAGS.dynamic_class_hint_geo_mean))
                print("vis.py: Masking with boundary threshold {}.".format(
                    FLAGS.boundary_threshold))
                print("----")
                boundary_mask = tf.cast(
                    tf.less(samples[common.BOUNDARY_DMAP],
                            FLAGS.boundary_threshold), tf.uint8)
                class_hints, hinted = tf.py_func(
                    func=train_utils.generate_class_clicks_helper(
                        geo_mean=FLAGS.dynamic_class_hint_geo_mean),
                    inp=[samples[common.LABEL], boundary_mask],
                    Tout=[tf.uint8, tf.bool])
                samples[common.HINT] = class_hints
                samples[common.HINT].set_shape(
                    samples[common.LABEL].get_shape().as_list())
                # Now preprocess this. Set the flag so that  the rest of the work will be done as usual.
                FLAGS.hint_types = ['class_hint']

            # If using class hints, preprocess into num_class binary mask channels
            if 'class_hint' in FLAGS.hint_types:
                assert len(
                    FLAGS.hint_types
                ) == 1, 'When using class hints, do not use other hint types!'
                num_classes = dataset.num_classes
                print('vis.py: num classes is {}'.format(num_classes))
                class_hint_channels_list = []
                for label in range(num_classes):
                    # Multiply by 255 is to bring into same range as image pixels...,
                    # and so feature_extractor mean subtraction will reduce it back to 0,1 range
                    class_hint_channel = tf.to_float(
                        tf.equal(samples[common.HINT], label)) * 255
                    class_hint_channels_list.append(class_hint_channel)
                class_hint_channels = tf.concat(class_hint_channels_list,
                                                axis=-1)
                samples[common.HINT] = class_hint_channels

            # Get hints and concat to image as input into network
            samples[common.HINT] = tf.identity(samples[common.HINT],
                                               name=common.HINT)
            model_inputs = tf.concat(
                [samples[common.IMAGE],
                 tf.to_float(samples[common.HINT])],
                axis=-1)
        else:
            # Just image is input into network
            model_inputs = samples[common.IMAGE]

        outputs_to_scales_to_logits = None
        logits = None
        predictions = None
        fixed_features = None
        extra_to_run = {}
        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            if FLAGS.compute_uncertainty and FLAGS.force_dropout_only_branch:
                fixed_features = model._get_features_after_decoder(
                    images=model_inputs,
                    model_options=model_options,
                    reuse=None,
                    is_training=False,
                    fine_tune_batch_norm=False,
                    force_dropout=True,
                    force_dropout_only_branch=FLAGS.force_dropout_only_branch,
                    keep_prob=FLAGS.keep_prob)

                samples_placeholders['fixed_features'] = tf.placeholder(
                    dtype=fixed_features.dtype, shape=fixed_features.shape)
                logits_from_fixed_features = model._get_branch_logits(
                    samples_placeholders['fixed_features'],
                    model_options.outputs_to_num_classes[common.OUTPUT_TYPE],
                    model_options.atrous_rates,
                    aspp_with_batch_norm=model_options.aspp_with_batch_norm,
                    kernel_size=model_options.logits_kernel_size,
                    reuse=None,
                    scope_suffix=common.OUTPUT_TYPE,
                    keep_prob=FLAGS.keep_prob,
                    force_dropout=True)
                logits_from_fixed_features = tf.image.resize_bilinear(
                    logits_from_fixed_features,
                    size=tf.shape(samples[common.IMAGE])[1:3],
                    align_corners=True)

                softmax_from_fixed_features = tf.nn.softmax(
                    logits_from_fixed_features)

                samples_placeholders['accumulated_softmax'] = tf.placeholder(
                    dtype=softmax_from_fixed_features.dtype,
                    shape=FLAGS.vis_placeholder_size)
                #shape=[1, 1025, 2049, 19])
                #shape=[1, 513, 513, 23])
                samples_placeholders[
                    'accumulated_softmax_sq'] = tf.placeholder(
                        dtype=softmax_from_fixed_features.dtype,
                        shape=FLAGS.vis_placeholder_size)
                #shape=[1, 1025, 2049, 19])
                #shape=[1, 513, 513, 23])

                accumulated_softmax = samples_placeholders[
                    'accumulated_softmax'] + softmax_from_fixed_features
                accumulated_softmax_sq = samples_placeholders[
                    'accumulated_softmax_sq'] + tf.square(
                        softmax_from_fixed_features)
                extra_to_run['accumulated_softmax'] = accumulated_softmax
                extra_to_run['accumulated_softmax_sq'] = accumulated_softmax_sq

            elif FLAGS.save_logits or FLAGS.compute_uncertainty:
                predictions, outputs_to_scales_to_logits = model.predict_labels(
                    # samples[common.IMAGE],
                    model_inputs,
                    model_options=model_options,
                    image_pyramid=FLAGS.image_pyramid,
                    also_return_logits=True,
                    force_dropout=(FLAGS.compute_uncertainty
                                   or FLAGS.force_dropout),
                    force_dropout_only_branch=FLAGS.force_dropout_only_branch,
                    keep_prob=FLAGS.keep_prob)

                assert tuple(FLAGS.eval_scales) == (1.0, )
                assert len(outputs_to_scales_to_logits) == 1
                for output in sorted(outputs_to_scales_to_logits):
                    scales_to_logits = outputs_to_scales_to_logits[output]
                    logits = scales_to_logits[model._MERGED_LOGITS_SCOPE]

                if FLAGS.compute_uncertainty:
                    assert not FLAGS.save_logits
                    # We need full size logits to compute final predition and uncertainty.
                    logits = tf.image.resize_bilinear(
                        logits,
                        size=tf.shape(model_inputs)[1:3],
                        align_corners=True)

                    softmax_logits = tf.nn.softmax(logits)

                    samples_placeholders[
                        'accumulated_softmax'] = tf.placeholder(
                            dtype=softmax_logits.dtype,
                            shape=FLAGS.vis_placeholder_size)
                    #shape=[1, 1025, 2049, 19])
                    #shape=[1, 513, 513, 23])
                    samples_placeholders[
                        'accumulated_softmax_sq'] = tf.placeholder(
                            dtype=softmax_logits.dtype,
                            shape=FLAGS.vis_placeholder_size)
                    #shape=[1, 1025, 2049, 19])
                    #shape=[1, 513, 513, 23])

                    accumulated_softmax = samples_placeholders[
                        'accumulated_softmax'] + softmax_logits
                    accumulated_softmax_sq = samples_placeholders[
                        'accumulated_softmax_sq'] + tf.square(softmax_logits)
                    extra_to_run['accumulated_softmax'] = accumulated_softmax
                    extra_to_run[
                        'accumulated_softmax_sq'] = accumulated_softmax_sq
            else:
                predictions = model.predict_labels(
                    # samples[common.IMAGE],
                    model_inputs,
                    model_options=model_options,
                    image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                # samples[common.IMAGE],
                model_inputs,
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
            if FLAGS.save_logits:
                raise NotImplementedError("Multiscale logits aren't saved")

        if predictions is not None:
            predictions = predictions[common.OUTPUT_TYPE]

        if FLAGS.min_resize_value and FLAGS.max_resize_value:
            if FLAGS.input_hints:
                #raise Exception("***Unclear if this will work with hints. Look over the code.")
                print(
                    "***Unclear if this will work with hints. Look over the code."
                )
            # Only support batch_size = 1, since we assume the dimensions of original
            # image after tf.squeeze is [height, width, 3].
            assert FLAGS.vis_batch_size == 1

            # Reverse the resizing and padding operations performed in preprocessing.
            # First, we slice the valid regions (i.e., remove padded region) and then
            # we reisze the predictions back.
            original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE])
            original_image_shape = tf.shape(original_image)
            predictions = tf.slice(
                predictions, [0, 0, 0],
                [1, original_image_shape[0], original_image_shape[1]])
            resized_shape = tf.to_int32([
                tf.squeeze(samples[common.HEIGHT]),
                tf.squeeze(samples[common.WIDTH])
            ])
            predictions = tf.squeeze(
                tf.image.resize_images(
                    tf.expand_dims(predictions, 3),
                    resized_shape,
                    method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                    align_corners=True), 3)

        tf.train.get_or_create_global_step()
        saver = tf.train.Saver(slim.get_variables_to_restore())
        sv = tf.train.Supervisor(graph=g,
                                 logdir=FLAGS.vis_logdir,
                                 init_op=tf.global_variables_initializer(),
                                 summary_op=None,
                                 summary_writer=None,
                                 global_step=None,
                                 saver=saver)

        if FLAGS.vis_num_batches <= 0:
            num_batches = int(
                math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size)))
        else:
            num_batches = FLAGS.vis_num_batches

            if FLAGS.shuffle:
                shuffled_idxs = range(dataset.num_samples)
                np.random.seed(FLAGS.shuffle_seed)
                np.random.shuffle(shuffled_idxs)
            else:
                shuffled_idxs = range(dataset.num_samples)
            idxs_to_keep = shuffled_idxs[FLAGS.start_idx:FLAGS.start_idx +
                                         FLAGS.vis_num_batches]

            if FLAGS.also_vis_first_N > 0:
                idxs_to_keep.extend(shuffled_idxs[0:FLAGS.also_vis_first_N])

            print(sorted(idxs_to_keep)[:10])
            print("There are {} indices to keep.".format(len(idxs_to_keep)))

            num_batches = int(
                math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size)))

        last_checkpoint = None

        # Loop to visualize the results when new checkpoint is created.
        num_iters = 0
        while (FLAGS.max_number_of_iterations <= 0
               or num_iters < FLAGS.max_number_of_iterations):
            num_iters += 1
            last_checkpoint = slim.evaluation.wait_for_new_checkpoint(
                FLAGS.checkpoint_dir, last_checkpoint)
            start = time.time()
            tf.logging.info('Starting visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
            tf.logging.info('Visualizing with model %s', last_checkpoint)

            with sv.managed_session(FLAGS.master,
                                    start_standard_services=False) as sess:
                sv.start_queue_runners(sess)
                sv.saver.restore(sess, last_checkpoint)

                image_id_offset = 0
                for batch in range(num_batches):

                    if batch in idxs_to_keep:
                        tf.logging.info('Visualizing batch %d / %d', batch + 1,
                                        num_batches)
                        _process_batch(
                            sess=sess,
                            original_images=samples[common.ORIGINAL_IMAGE],
                            semantic_predictions=predictions,
                            image_names=samples[common.IMAGE_NAME],
                            image_heights=samples[common.HEIGHT],
                            image_widths=samples[common.WIDTH],
                            image_id_offset=image_id_offset,
                            save_dir=save_dir,
                            raw_save_dir=raw_save_dir,
                            save_logits=FLAGS.save_logits,
                            logits=logits,
                            fixed_features=fixed_features,
                            extra_to_run=extra_to_run,
                            logit_save_dir=logit_save_dir,
                            uncertainty_save_dir=uncertainty_save_dir,
                            train_id_to_eval_id=train_id_to_eval_id,
                            samples_orig=samples_orig,
                            samples_placeholders=samples_placeholders,
                            compute_uncertainty=FLAGS.compute_uncertainty,
                            num_forward_passes=FLAGS.
                            compute_uncertainty_iterations)
                    else:
                        # Run batch generator to skip this batch
                        sess.run([samples_orig])
                    image_id_offset += FLAGS.vis_batch_size

            tf.logging.info('Finished visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
            time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
            if time_to_next_eval > 0 and num_iters < FLAGS.max_number_of_iterations:
                time.sleep(time_to_next_eval)
Beispiel #16
0
    def train(self):
        FLAGS = self.flags
        dataset_split = 'train'
        data_config = edict()
        data_config.edge_width = 20
        data_config.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset]
        data_config.edge_class_num = FLAGS.edge_class_num
        img_files, label_files = get_dataset_files(FLAGS.dataset,
                                                   dataset_split)

        dataset = edict()
        dataset_pp = dataset_pipeline(data_config,
                                      img_files,
                                      label_files,
                                      is_train=True)
        dataset.num_classes = DATASETS_CLASS_NUM[FLAGS.dataset]
        dataset.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset]
        dataset.num_samples = len(dataset_pp)

        tf.logging.set_verbosity(tf.logging.INFO)
        # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
        config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones,
                                               clone_on_cpu=FLAGS.clone_on_cpu,
                                               replica_id=FLAGS.task,
                                               num_replicas=FLAGS.num_replicas,
                                               num_ps_tasks=FLAGS.num_ps_tasks)

        # Split the batch across GPUs.
        assert FLAGS.train_batch_size % config.num_clones == 0, (
            'Training batch size not divisble by number of clones (GPUs).')

        clone_batch_size = FLAGS.train_batch_size // config.num_clones

        # Get dataset-dependent information.
        #        dataset = segmentation_dataset.get_dataset(
        #            FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir)

        tf.gfile.MakeDirs(FLAGS.train_logdir)
        tf.logging.info('Training on %s set', FLAGS.train_split)

        with tf.Graph().as_default() as graph:
            with tf.device(config.inputs_device()):
                data_list = dataset_pp.iterator()
                samples = input_generator.get(
                    (data_list, dataset.ignore_label),
                    FLAGS.train_crop_size,
                    clone_batch_size,
                    min_resize_value=FLAGS.min_resize_value,
                    max_resize_value=FLAGS.max_resize_value,
                    resize_factor=FLAGS.resize_factor,
                    min_scale_factor=FLAGS.min_scale_factor,
                    max_scale_factor=FLAGS.max_scale_factor,
                    scale_factor_step_size=FLAGS.scale_factor_step_size,
                    dataset_split=FLAGS.train_split,
                    is_training=True,
                    model_variant=FLAGS.model_variant)
                inputs_queue = prefetch_queue.prefetch_queue(samples,
                                                             capacity=128 *
                                                             config.num_clones)

            # Create the global step on the device storing the variables.
            with tf.device(config.variables_device()):
                global_step = tf.train.get_or_create_global_step()

                # Define the model and create clones.
                model_fn = self._build_deeplab
                model_args = (inputs_queue, {
                    common.OUTPUT_TYPE: dataset.num_classes
                }, dataset.ignore_label)
                clones = model_deploy.create_clones(config,
                                                    model_fn,
                                                    args=model_args)

                # Gather update_ops from the first clone. These contain, for example,
                # the updates for the batch_norm variables created by model_fn.
                first_clone_scope = config.clone_scope(0)
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                               first_clone_scope)

            # Gather initial summaries.
            summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

            # Add summaries for model variables.
            for model_var in slim.get_model_variables():
                summaries.add(
                    tf.summary.histogram(model_var.op.name, model_var))

            label_name = ('%s/%s:0' %
                          (first_clone_scope, common.LABEL)).strip('/')
            print('first clone label name is:', label_name)

            # Add summaries for images, labels, semantic predictions
            if FLAGS.save_summaries_images:
                summary_image = graph.get_tensor_by_name(
                    ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
                summaries.add(
                    tf.summary.image('samples/%s' % common.IMAGE,
                                     summary_image))

                first_clone_label = graph.get_tensor_by_name(
                    ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))

                # Scale up summary image pixel values for better visualization.
                pixel_scaling = max(1, 255 // dataset.num_classes)
                summary_label = tf.cast(first_clone_label * pixel_scaling,
                                        tf.uint8)
                summaries.add(
                    tf.summary.image('samples/%s' % common.LABEL,
                                     summary_label))

                first_clone_output = graph.get_tensor_by_name(
                    ('%s/%s:0' %
                     (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
                predictions = tf.expand_dims(tf.argmax(first_clone_output, 3),
                                             -1)

                summary_predictions = tf.cast(predictions * pixel_scaling,
                                              tf.uint8)
                summaries.add(
                    tf.summary.image('samples/%s' % common.OUTPUT_TYPE,
                                     summary_predictions))

            # Add summaries for miou,acc
            labels = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
            predictions = graph.get_tensor_by_name(
                ('%s/%s:0' %
                 (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
            predictions = tf.image.resize_bilinear(predictions,
                                                   tf.shape(labels)[1:3],
                                                   align_corners=True)
            # predictions shape (2, 513, 513, 19/21)
            print('predictions shape', predictions.shape)
            self.get_metric(labels, predictions, 'train')

            # Add summaries for losses.
            for loss in tf.get_collection(tf.GraphKeys.LOSSES,
                                          first_clone_scope):
                summaries.add(
                    tf.summary.scalar('losses/%s' % loss.op.name, loss))

#            losses = {}
#            for key in [common.OUTPUT_TYPE,common.EDGE]:
#                losses[key]=graph.get_tensor_by_name(name='losses/%s:0'%key)
#                summaries.add(tf.summary.scalar('losses/'+key,losses[key]))

# Build the optimizer based on the device specification.
            with tf.device(config.optimizer_device()):
                learning_rate = train_utils.get_model_learning_rate(
                    FLAGS.learning_policy, FLAGS.base_learning_rate,
                    FLAGS.learning_rate_decay_step,
                    FLAGS.learning_rate_decay_factor,
                    FLAGS.training_number_of_steps, FLAGS.learning_power,
                    FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       FLAGS.momentum)
                summaries.add(tf.summary.scalar('learning_rate',
                                                learning_rate))

            startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps
            for variable in slim.get_model_variables():
                summaries.add(tf.summary.histogram(variable.op.name, variable))

            with tf.device(config.variables_device()):
                total_loss, grads_and_vars = model_deploy.optimize_clones(
                    clones, optimizer)
                total_loss = tf.check_numerics(total_loss,
                                               'Loss is inf or nan.')
                summaries.add(
                    tf.summary.scalar('losses/total_loss', total_loss))

                # Modify the gradients for biases and last layer variables.
                last_layers = model.get_extra_layer_scopes(
                    FLAGS.last_layers_contain_logits_only)
                grad_mult = train_utils.get_model_gradient_multipliers(
                    last_layers, FLAGS.last_layer_gradient_multiplier)
                if grad_mult:
                    grads_and_vars = slim.learning.multiply_gradients(
                        grads_and_vars, grad_mult)

                # Create gradient update op.
                grad_updates = optimizer.apply_gradients(
                    grads_and_vars, global_step=global_step)
                update_ops.append(grad_updates)
                update_op = tf.group(*update_ops)
                with tf.control_dependencies([update_op]):
                    train_tensor = tf.identity(total_loss, name='train_op')

            # Add the summaries from the first clone. These contain the summaries
            # created by model_fn and either optimize_clones() or _gather_clone_loss().
            summaries |= set(
                tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

            # Merge all summaries together.
            summary_op = tf.summary.merge(list(summaries))

            # Soft placement allows placing on CPU ops without GPU implementation.
            session_config = tf.ConfigProto(allow_soft_placement=True,
                                            log_device_placement=False)
            session_config.gpu_options.allow_growth = True

            #            init_fn=train_utils.get_model_init_fn(
            #                    FLAGS.train_logdir,
            #                    FLAGS.tf_initial_checkpoint,
            #                    FLAGS.initialize_last_layer,
            #                    last_layers,
            #                    ignore_missing_vars=True)

            exclude_list = ['global_step']
            if not FLAGS.initialize_last_layer:
                exclude_list.extend(last_layers)
            variables_to_restore = slim.get_variables_to_restore(
                exclude=exclude_list)
            init_fn = slim.assign_from_checkpoint_fn(
                model_path=FLAGS.tf_initial_checkpoint,
                var_list=variables_to_restore,
                ignore_missing_vars=True)
            #            saver = tf.train.Saver()
            #            train_writer = tf.summary.FileWriter(FLAGS.train_logdir)
            #            sess=tf.Session(config=session_config)
            #            init_fn(sess)
            #            sess.run(tf.global_variables_initializer())
            #            sess.run(tf.local_variables_initializer())
            #            tf.train.start_queue_runners(sess)
            #
            #            for i in trange(FLAGS.training_number_of_steps):
            #                loss,summary,n_step=sess.run([train_tensor,summary_op,global_step])
            #                train_writer.add_summary(summary,i)
            #                if i%100==1:
            #                    tqdm.write('%d/%d global_step=%0.2f, loss=%0.5f'%(i,FLAGS.training_number_of_steps,n_step,loss))
            #
            #            saver.save(sess,os.path.join(FLAGS.train_logdir,'model'),global_step=FLAGS.training_number_of_steps)
            #            train_writer.close()
            # Start the training.
            slim.learning.train(train_tensor,
                                logdir=FLAGS.train_logdir,
                                log_every_n_steps=FLAGS.log_steps,
                                master=FLAGS.master,
                                number_of_steps=FLAGS.training_number_of_steps,
                                is_chief=(FLAGS.task == 0),
                                session_config=session_config,
                                startup_delay_steps=startup_delay_steps,
                                init_fn=init_fn,
                                summary_op=summary_op,
                                save_summaries_secs=FLAGS.save_summaries_secs,
                                save_interval_secs=FLAGS.save_interval_secs)
Beispiel #17
0
    def val(self):
        FLAGS = self.flags
        tf.logging.set_verbosity(tf.logging.INFO)
        # Get dataset-dependent information.
        #        dataset = segmentation_dataset.get_dataset(
        #            FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir)
        dataset_split = 'val'
        data_config = edict()
        data_config.edge_width = 20
        data_config.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset]
        data_config.edge_class_num = FLAGS.edge_class_num
        img_files, label_files = get_dataset_files(FLAGS.dataset,
                                                   dataset_split)
        dataset_pp = dataset_pipeline(data_config,
                                      img_files,
                                      label_files,
                                      is_train=False)
        num_classes = DATASETS_CLASS_NUM[FLAGS.dataset]
        ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset]
        num_samples = len(dataset_pp)

        print('eval_logdir is', FLAGS.eval_logdir)
        print('checkpoint dir is', FLAGS.checkpoint_dir)

        tf.gfile.MakeDirs(FLAGS.eval_logdir)
        tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

        with tf.Graph().as_default():
            data_list = dataset_pp.iterator()
            samples = input_generator.get(
                (data_list, ignore_label),
                FLAGS.eval_crop_size,
                FLAGS.eval_batch_size,
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                dataset_split=FLAGS.eval_split,
                is_training=False,
                model_variant=FLAGS.model_variant)

            model_options = common.ModelOptions(
                outputs_to_num_classes={common.OUTPUT_TYPE: num_classes},
                crop_size=FLAGS.eval_crop_size,
                atrous_rates=FLAGS.atrous_rates,
                output_stride=FLAGS.output_stride)

            if tuple(FLAGS.eval_scales) == (1.0, ):
                tf.logging.info('Performing single-scale test.')
                predictions = predict_labels(samples[common.IMAGE],
                                             model_options,
                                             image_pyramid=FLAGS.image_pyramid)
            else:
                tf.logging.info('Performing multi-scale test.')
                predictions = predict_labels_multi_scale(
                    samples[common.IMAGE],
                    model_options=model_options,
                    eval_scales=FLAGS.eval_scales,
                    add_flipped_images=FLAGS.add_flipped_images)
            predictions = predictions[common.OUTPUT_TYPE]
            predictions = tf.reshape(predictions, shape=[-1])
            labels = tf.reshape(samples[common.LABEL], shape=[-1])
            weights = tf.to_float(tf.not_equal(labels, ignore_label))

            # Set ignore_label regions to label 0, because metrics.mean_iou requires
            # range of labels = [0, dataset.num_classes). Note the ignore_label regions
            # are not evaluated since the corresponding regions contain weights = 0.
            labels = tf.where(tf.equal(labels, ignore_label),
                              tf.zeros_like(labels), labels)

            predictions_tag = 'miou'
            for eval_scale in FLAGS.eval_scales:
                predictions_tag += '_' + str(eval_scale)
            if FLAGS.add_flipped_images:
                predictions_tag += '_flipped'

            # Define the evaluation metric.
            metric_map = {}
            metric_map[predictions_tag] = tf.metrics.mean_iou(predictions,
                                                              labels,
                                                              num_classes,
                                                              weights=weights)

            metrics_to_values, metrics_to_updates = (
                tf.contrib.metrics.aggregate_metric_map(metric_map))

            for metric_name, metric_value in six.iteritems(metrics_to_values):
                slim.summaries.add_scalar_summary(metric_value,
                                                  metric_name,
                                                  print_summary=True)

            num_batches = int(
                math.ceil(num_samples / float(FLAGS.eval_batch_size)))

            tf.logging.info('Eval num images %d', num_samples)
            tf.logging.info('Eval batch size %d and num batch %d',
                            FLAGS.eval_batch_size, num_batches)

            num_eval_iters = None
            if FLAGS.max_number_of_evaluations > 0:
                num_eval_iters = FLAGS.max_number_of_evaluations
            slim.evaluation.evaluation_loop(
                master=FLAGS.master,
                checkpoint_dir=FLAGS.checkpoint_dir,
                logdir=FLAGS.eval_logdir,
                num_evals=num_batches,
                eval_op=list(metrics_to_updates.values()),
                max_number_of_evaluations=num_eval_iters,
                eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv):
    FLAGS.train_logdir = FLAGS.base_logdir + '/' + FLAGS.task_name
    if FLAGS.restore_name == None:
        FLAGS.restore_logdir = FLAGS.train_logdir
    else:
        FLAGS.restore_logdir = FLAGS.base_logdir + '/' + FLAGS.restore_name

    tf.logging.set_verbosity(tf.logging.INFO)

    # Get logging dir ready.
    if not (os.path.isdir(FLAGS.train_logdir)):
        tf.gfile.MakeDirs(FLAGS.train_logdir)
    elif len(os.listdir(FLAGS.train_logdir)) != 0:
        if not (FLAGS.if_restore):
            if_delete_all = raw_input(
                '#### The log folder %s exists and non-empty; delete all logs? [y/n] '
                % FLAGS.train_logdir)
            if if_delete_all == 'y':
                os.system('rm -rf %s/*' % FLAGS.train_logdir)
                print '==== Log folder emptied.'
        else:
            print '==== Log folder exists; not emptying it because we need to restore from it.'
    tf.logging.info('==== Logging in dir:%s; Training on %s set',
                    FLAGS.train_logdir, FLAGS.train_split)

    # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
    config = model_deploy.DeploymentConfig(
        num_clones=FLAGS.num_clones,
        clone_on_cpu=FLAGS.clone_on_cpu,
        replica_id=FLAGS.task,
        num_replicas=FLAGS.num_replicas,
        num_ps_tasks=FLAGS.num_ps_tasks)  # /device:CPU:0

    # Split the batch across GPUs.
    assert FLAGS.train_batch_size % config.num_clones == 0, (
        'Training batch size not divisble by number of clones (GPUs).')
    clone_batch_size = FLAGS.train_batch_size // config.num_clones

    # Get dataset-dependent information.
    dataset = regression_dataset.get_dataset(FLAGS.dataset,
                                             FLAGS.train_split,
                                             dataset_dir=FLAGS.dataset_dir)
    dataset_val = regression_dataset.get_dataset(FLAGS.dataset,
                                                 FLAGS.val_split,
                                                 dataset_dir=FLAGS.dataset_dir)
    print '#### The data has size:', dataset.num_samples, dataset_val.num_samples

    codes = np.load(
        '/ssd2/public/zhurui/Documents/mesh-voxelization/models/cars_64/codes.npy'
    )

    with tf.Graph().as_default() as graph:
        with tf.device(config.inputs_device()):
            codes_max = np.amax(codes, axis=1).reshape((-1, 1))
            codes_min = np.amin(codes, axis=1).reshape((-1, 1))
            shape_range = np.hstack(
                (codes_max + (codes_max - codes_min) /
                 (dataset.SHAPE_BINS - 1.), codes_min -
                 (codes_max - codes_min) / (dataset.SHAPE_BINS - 1.)))
            bin_range = [
                np.linspace(r[0], r[1], num=b).tolist()
                for r, b in zip(np.vstack((dataset.pose_range,
                                           shape_range)), dataset.bin_nums)
            ]
            # print np.vstack((dataset.pose_range, shape_range))
            # print bin_range[0]
            # print bin_range[-1]
            outputs_to_num_classes = {}
            outputs_to_indices = {}
            for output, bin_num, idx in zip(dataset.output_names,
                                            dataset.bin_nums,
                                            range(len(dataset.output_names))):
                if FLAGS.if_discrete_loss:
                    outputs_to_num_classes[output] = bin_num
                else:
                    outputs_to_num_classes[output] = 1
                outputs_to_indices[output] = idx
            bin_vals = [tf.constant(value=[bin_range[i]], dtype=tf.float32, shape=[1, dataset.bin_nums[i]], name=name) \
                    for i, name in enumerate(dataset.output_names)]
            # print outputs_to_num_classes
            # print spaces_to_indices

            samples = input_generator.get(dataset,
                                          codes,
                                          clone_batch_size,
                                          dataset_split=FLAGS.train_split,
                                          is_training=True,
                                          model_variant=FLAGS.model_variant)
            inputs_queue = prefetch_queue.prefetch_queue(samples,
                                                         capacity=128 *
                                                         config.num_clones)

            samples_val = input_generator.get(
                dataset_val,
                codes,
                clone_batch_size,
                dataset_split=FLAGS.val_split,
                is_training=False,
                model_variant=FLAGS.model_variant)
            inputs_queue_val = prefetch_queue.prefetch_queue(samples_val,
                                                             capacity=128)

        # Create the global step on the device storing the variables.
        with tf.device(config.variables_device()):
            global_step = tf.train.get_or_create_global_step()

            # Define the model and create clones.
            model_fn = _build_deeplab
            model_args = (FLAGS, inputs_queue.dequeue(),
                          outputs_to_num_classes, outputs_to_indices, bin_vals,
                          bin_range, dataset, codes, True, False)
            clones = model_deploy.create_clones(config,
                                                model_fn,
                                                args=model_args)

            # Gather update_ops from the first clone. These contain, for example,
            # the updates for the batch_norm variables created by model_fn.
            first_clone_scope = config.clone_scope(0)  # clone_0
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                           first_clone_scope)

        with tf.device('/device:GPU:3'):
            if FLAGS.if_val:
                ## Construct the validation graph; takes one GPU.
                _build_deeplab(FLAGS,
                               inputs_queue_val.dequeue(),
                               outputs_to_num_classes,
                               outputs_to_indices,
                               bin_vals,
                               bin_range,
                               dataset_val,
                               codes,
                               is_training=False,
                               reuse=True)

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for images, labels, semantic predictions
        summary_loss_dict = {}
        if FLAGS.save_summaries_images:
            if FLAGS.num_clones > 1:
                pattern_train = first_clone_scope + '/%s:0'
            else:
                pattern_train = '%s:0'
            pattern_val = 'val-%s:0'
            pattern = pattern_val if FLAGS.if_val else pattern_train

            gather_list = [0] if FLAGS.num_clones < 3 else [0, 1, 2]

            summary_mask = graph.get_tensor_by_name(pattern %
                                                    'not_ignore_mask_in_loss')
            summary_mask = tf.reshape(summary_mask,
                                      [-1, dataset.height, dataset.width, 1])
            summary_mask_float = tf.to_float(summary_mask)
            summaries.add(
                tf.summary.image(
                    'gt/%s' % 'not_ignore_mask',
                    tf.gather(tf.cast(summary_mask_float * 255., tf.uint8),
                              gather_list)))

            summary_image = graph.get_tensor_by_name(pattern % common.IMAGE)
            summaries.add(
                tf.summary.image('gt/%s' % common.IMAGE,
                                 tf.gather(summary_image, gather_list)))

            summary_image_name = graph.get_tensor_by_name(pattern %
                                                          common.IMAGE_NAME)
            summaries.add(
                tf.summary.text('gt/%s' % common.IMAGE_NAME,
                                tf.gather(summary_image_name, gather_list)))

            summary_image_name = graph.get_tensor_by_name(pattern_train %
                                                          common.IMAGE_NAME)
            summaries.add(
                tf.summary.text('gt/%s_train' % common.IMAGE_NAME,
                                tf.gather(summary_image_name, gather_list)))

            summary_vis = graph.get_tensor_by_name(pattern % 'vis')
            summaries.add(
                tf.summary.image('gt/%s' % 'vis',
                                 tf.gather(summary_vis, gather_list)))

            def scale_to_255(tensor, pixel_scaling=None):
                tensor = tf.to_float(tensor)
                if pixel_scaling == None:
                    offset_to_zero = tf.reduce_min(tensor)
                    scale_to_255 = tf.div(
                        255., tf.reduce_max(tensor - offset_to_zero))
                else:
                    offset_to_zero, scale_to_255 = pixel_scaling
                summary_tensor_float = tensor - offset_to_zero
                summary_tensor_float = summary_tensor_float * scale_to_255
                summary_tensor_float = tf.clip_by_value(
                    summary_tensor_float, 0., 255.)
                summary_tensor_uint8 = tf.cast(summary_tensor_float, tf.uint8)
                return summary_tensor_uint8, (offset_to_zero, scale_to_255)

            label_outputs = graph.get_tensor_by_name(pattern %
                                                     'label_pose_shape_map')
            label_id_outputs = graph.get_tensor_by_name(
                pattern % 'pose_shape_label_id_map')
            logit_outputs = graph.get_tensor_by_name(
                pattern % 'scaled_prob_logits_pose_shape_map')

            summary_rot_diffs = graph.get_tensor_by_name(pattern %
                                                         'rot_error_map')
            summary_rot_diffs = tf.where(summary_mask, summary_rot_diffs,
                                         tf.zeros_like(summary_rot_diffs))
            summary_rot_diffs_uint8, _ = scale_to_255(summary_rot_diffs)
            summaries.add(
                tf.summary.image(
                    'metrics_map/%s' % 'rot_diffs',
                    tf.gather(summary_rot_diffs_uint8, gather_list)))

            summary_trans_diffs = graph.get_tensor_by_name(pattern %
                                                           'trans_error_map')
            summary_trans_diffs = tf.where(summary_mask, summary_trans_diffs,
                                           tf.zeros_like(summary_trans_diffs))
            summary_trans_diffs_uint8, _ = scale_to_255(summary_trans_diffs)
            summaries.add(
                tf.summary.image('metrics_map/%s' % 'trans_diffs',
                                 tf.gather(summary_trans_diffs, gather_list)))

            shape_id_outputs = graph.get_tensor_by_name(pattern %
                                                        'shape_id_map')
            shape_id_outputs = tf.where(summary_mask, shape_id_outputs + 1,
                                        tf.zeros_like(shape_id_outputs))
            summary_shape_id_output_uint8, _ = scale_to_255(shape_id_outputs)
            summaries.add(
                tf.summary.image(
                    'shape/shape_id_map',
                    tf.gather(summary_shape_id_output_uint8, gather_list)))

            shape_id_outputs_gt = graph.get_tensor_by_name(pattern %
                                                           'shape_id_map_gt')
            shape_id_outputs_gt = tf.where(summary_mask,
                                           shape_id_outputs_gt + 1,
                                           tf.zeros_like(shape_id_outputs))
            summary_shape_id_output_uint8_gt, _ = scale_to_255(
                shape_id_outputs_gt)
            summaries.add(
                tf.summary.image(
                    'shape/shape_id_map_gt',
                    tf.gather(summary_shape_id_output_uint8_gt, gather_list)))

            if FLAGS.if_summary_metrics:
                shape_id_outputs = graph.get_tensor_by_name(
                    pattern % 'shape_id_map_predict')
                summary_shape_id_output = tf.where(
                    summary_mask, shape_id_outputs,
                    tf.zeros_like(shape_id_outputs))
                summary_shape_id_output_uint8, _ = scale_to_255(
                    summary_shape_id_output)
                summaries.add(
                    tf.summary.image(
                        'shape/shape_id_map_predict',
                        tf.gather(summary_shape_id_output_uint8, gather_list)))

                shape_id_sim_map_train = graph.get_tensor_by_name(
                    pattern_train % 'shape_id_sim_map')
                # shape_id_sim_map_train = tf.where(summary_mask, shape_id_sim_map_train, tf.zeros_like(shape_id_sim_map_train))
                shape_id_sim_map_uint8_train, _ = scale_to_255(
                    shape_id_sim_map_train, pixel_scaling=(0., 255.))
                summaries.add(
                    tf.summary.image(
                        'metrics_map/shape_id_sim_map-trainInv',
                        tf.gather(shape_id_sim_map_uint8_train, gather_list)))

                shape_id_sim_map = graph.get_tensor_by_name(pattern %
                                                            'shape_id_sim_map')
                # shape_id_sim_map = tf.where(summary_mask, shape_id_sim_map, tf.zeros_like(shape_id_sim_map))
                shape_id_sim_map_uint8, _ = scale_to_255(shape_id_sim_map,
                                                         pixel_scaling=(0.,
                                                                        255.))
                summaries.add(
                    tf.summary.image(
                        'metrics_map/shape_id_sim_map-valInv',
                        tf.gather(shape_id_sim_map_uint8, gather_list)))

            for output_idx, output in enumerate(dataset.output_names):
                # # Scale up summary image pixel values for better visualization.
                summary_label_output = tf.gather(label_outputs, [output_idx],
                                                 axis=3)
                summary_label_output = tf.where(
                    summary_mask, summary_label_output,
                    tf.zeros_like(summary_label_output))
                summary_label_output_uint8, pixel_scaling = scale_to_255(
                    summary_label_output)
                summaries.add(
                    tf.summary.image(
                        'output/%s_label' % output,
                        tf.gather(summary_label_output_uint8, gather_list)))

                summary_logit_output = tf.gather(logit_outputs, [output_idx],
                                                 axis=3)
                summary_logit_output = tf.where(
                    summary_mask, summary_logit_output,
                    tf.zeros_like(summary_logit_output))
                summary_logit_output_uint8, _ = scale_to_255(
                    summary_logit_output, pixel_scaling)
                summaries.add(
                    tf.summary.image(
                        'output/%s_logit' % output,
                        tf.gather(summary_logit_output_uint8, gather_list)))

                # summary_label_id_output = tf.to_float(tf.gather(label_id_outputs, [output_idx], axis=3))
                # summary_label_id_output = tf.where(summary_mask, summary_label_id_output+1, tf.zeros_like(summary_label_id_output))
                # summary_label_id_output_uint8, _ = scale_to_255(summary_label_id_output)
                # summary_label_id_output_uint8 = tf.identity(summary_label_id_output_uint8, 'tttt'+output)
                # summaries.add(tf.summary.image(
                #     'test/%s_label_id' % output, tf.gather(summary_label_id_output_uint8, gather_list)))

                summary_diff = tf.abs(
                    tf.to_float(summary_label_output_uint8) -
                    tf.to_float(summary_logit_output_uint8))
                summary_diff = tf.where(summary_mask, summary_diff,
                                        tf.zeros_like(summary_diff))
                summaries.add(
                    tf.summary.image(
                        'diff_map/%s_ldiff' % output,
                        tf.gather(tf.cast(summary_diff, tf.uint8),
                                  gather_list)))

                summary_loss = graph.get_tensor_by_name(
                    (pattern % 'loss_slice_reg_').replace(':0', '') + output +
                    ':0')
                summaries.add(
                    tf.summary.scalar(
                        'slice_loss/' + (pattern % 'reg_').replace(':0', '') +
                        output, summary_loss))

                summary_loss = graph.get_tensor_by_name(
                    (pattern % 'loss_slice_cls_').replace(':0', '') + output +
                    ':0')
                summaries.add(
                    tf.summary.scalar(
                        'slice_loss/' + (pattern % 'cls_').replace(':0', '') +
                        output, summary_loss))

            for pattern in [pattern_train, pattern_val
                            ] if FLAGS.if_val else [pattern_train]:
                add_metrics = ['loss_all_shape_id_cls_metric'
                               ] if FLAGS.if_summary_metrics else []
                for loss_name in [
                        'loss_reg_rot_quat_metric', 'loss_reg_rot_quat',
                        'loss_reg_trans_metric', 'loss_reg_trans',
                        'loss_cls_ALL', 'loss_reg_shape'
                ] + add_metrics:
                    if pattern == pattern_val:
                        summary_loss_avg = graph.get_tensor_by_name(pattern %
                                                                    loss_name)
                        # summary_loss_dict['val-'+loss_name] = summary_loss_avg
                    else:
                        summary_loss_avg = train_utils.get_avg_tensor_from_scopes(
                            FLAGS.num_clones, '%s:0', graph, config, loss_name)
                        # summary_loss_dict['train-'+loss_name] = summary_loss_avg
                    summaries.add(
                        tf.summary.scalar(
                            ('total_loss/' + pattern % loss_name).replace(
                                ':0', ''), summary_loss_avg))

        # Build the optimizer based on the device specification.
        with tf.device(config.optimizer_device()):
            learning_rate = train_utils.get_model_learning_rate(
                FLAGS.learning_policy, FLAGS.base_learning_rate,
                FLAGS.learning_rate_decay_step,
                FLAGS.learning_rate_decay_factor,
                FLAGS.training_number_of_steps, FLAGS.learning_power,
                FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
            optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                   FLAGS.momentum)
            # optimizer = tf.train.AdamOptimizer(learning_rate)
            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

        startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps

        with tf.device(config.variables_device()):
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, optimizer)
            print '------ total_loss', total_loss, tf.get_collection(
                tf.GraphKeys.LOSSES, first_clone_scope)
            total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
            summaries.add(tf.summary.scalar('total_loss/train', total_loss))

            # Modify the gradients for biases and last layer variables.
            last_layers = model.get_extra_layer_scopes(
                FLAGS.last_layers_contain_logits_only)
            print '////last layers', last_layers

            # Filter trainable variables for last layers ONLY.
            # grads_and_vars = train_utils.filter_gradients(last_layers, grads_and_vars)

            grad_mult = train_utils.get_model_gradient_multipliers(
                last_layers, FLAGS.last_layer_gradient_multiplier)
            if grad_mult:
                grads_and_vars = slim.learning.multiply_gradients(
                    grads_and_vars, grad_mult)

            # Create gradient update op.
            grad_updates = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)
            update_ops.append(grad_updates)
            update_op = tf.group(*update_ops)
            with tf.control_dependencies([update_op]):
                train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries))

        # Soft placement allows placing on CPU ops without GPU implementation.
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)
        session_config.gpu_options.allow_growth = False

        def train_step_fn(sess, train_op, global_step, train_step_kwargs):
            train_step_fn.step += 1  # or use global_step.eval(session=sess)

            # calc training losses
            loss, should_stop = slim.learning.train_step(
                sess, train_op, global_step, train_step_kwargs)
            print loss
            # print 'loss: ', loss
            # first_clone_test = graph.get_tensor_by_name(
            #         ('%s/%s:0' % (first_clone_scope, 'shape_map')).strip('/'))
            # test = sess.run(first_clone_test)
            # # print test
            # print 'test: ', test.shape, np.max(test), np.min(test), np.mean(test), test.dtype
            should_stop = 0

            if FLAGS.if_val and train_step_fn.step % FLAGS.val_interval_steps == 0:
                # first_clone_test = graph.get_tensor_by_name('val-loss_all:0')
                # test = sess.run(first_clone_test)
                print '-- Validating...'
                first_clone_test = graph.get_tensor_by_name(
                    ('%s/%s:0' %
                     (first_clone_scope, 'shape_id_map')).strip('/'))
                first_clone_test2 = graph.get_tensor_by_name(
                    ('%s/%s:0' %
                     (first_clone_scope, 'shape_id_sim_map')).strip('/'))
                # 'ttttrow:0')
                first_clone_test3 = graph.get_tensor_by_name((
                    '%s/%s:0' %
                    (first_clone_scope, 'not_ignore_mask_in_loss')).strip('/'))
                # 'ttttrow:0')
                test_out, test_out2, test_out3 = sess.run(
                    [first_clone_test, first_clone_test2, first_clone_test3])
                # test_out = test[:, :, :, 3]
                test_out = test_out[test_out3]
                # test_out2 = test2[:, :, :, 3]
                test_out2 = test_out2[test_out3]
                # print test_out
                print 'shape_id_map: ', test_out.shape, np.max(
                    test_out), np.min(test_out), np.mean(test_out), np.median(
                        test_out), test_out.dtype
                print 'shape_id_sim_map: ', test_out2.shape, np.max(
                    test_out2), np.min(test_out2), np.mean(
                        test_out2), np.median(test_out2), test_out2.dtype
                print 'masks sum: ', test_out3.dtype, np.sum(
                    test_out3.astype(float))
                # assert np.max(test_out) == np.max(test_out2), 'MAtch1!!!'
                # assert np.min(test_out) == np.min(test_out2), 'MAtch2!!!'

            # first_clone_label = graph.get_tensor_by_name(
            #         ('%s/%s:0' % (first_clone_scope, 'pose_map')).strip('/')) # clone_0/val-loss:0
            # # first_clone_pose_dict = graph.get_tensor_by_name(
            # #         ('%s/%s:0' % (first_clone_scope, 'pose_dict')).strip('/'))
            # first_clone_logit = graph.get_tensor_by_name(
            #         ('%s/%s:0' % (first_clone_scope, 'scaled_regression')).strip('/'))
            # not_ignore_mask = graph.get_tensor_by_name(
            #         ('%s/%s:0' % (first_clone_scope, 'not_ignore_mask_in_loss')).strip('/'))
            # label, logits, mask = sess.run([first_clone_label, first_clone_logit, not_ignore_mask])
            # mask = np.reshape(mask, (-1, FLAGS.train_crop_size[0], FLAGS.train_crop_size[1], dataset.num_classes))

            # print '... shapes, types, loss', label.shape, label.dtype, logits.shape, logits.dtype, loss
            # print 'mask', mask.shape, np.mean(mask)
            # logits[mask==0.] = 0.
            # print 'logits', logits.shape, np.max(logits), np.min(logits), np.mean(logits), logits.dtype
            # for idx in range(6):
            #     print idx, np.max(label[:, :, :, idx]), np.min(label[:, :, :, idx])
            # label = label[:, :, :, 5]
            # print 'label', label.shape, np.max(label), np.min(label), np.mean(label), label.dtype
            # print pose_dict, pose_dict.shape
            # # print 'training....... logits stats: ', np.max(logits), np.min(logits), np.mean(logits)
            # # label_one_piece = label[0, :, :, 0]
            # # print 'training....... label stats', np.max(label_one_piece), np.min(label_one_piece), np.sum(label_one_piece[label_one_piece!=255.])
            return [loss, should_stop]

        train_step_fn.step = 0

        # trainables = [v.name for v in tf.trainable_variables()]
        # alls =[v.name for v in tf.all_variables()]
        # print '----- Trainables %d: '%len(trainables), trainables
        # print '----- All %d: '%len(alls), alls[:10]
        # print '===== ', len(list(set(trainables) - set(alls)))
        # print '===== ', len(list(set(alls) - set(trainables)))

        if FLAGS.if_print_tensors:
            for op in tf.get_default_graph().get_operations():
                print str(op.name)

        # Start the training.
        slim.learning.train(train_tensor,
                            train_step_fn=train_step_fn,
                            logdir=FLAGS.train_logdir,
                            log_every_n_steps=FLAGS.log_steps,
                            master=FLAGS.master,
                            number_of_steps=FLAGS.training_number_of_steps,
                            is_chief=(FLAGS.task == 0),
                            session_config=session_config,
                            startup_delay_steps=startup_delay_steps,
                            init_fn=train_utils.get_model_init_fn(
                                FLAGS.restore_logdir,
                                FLAGS.tf_initial_checkpoint,
                                FLAGS.if_restore,
                                FLAGS.initialize_last_layer,
                                last_layers,
                                ignore_missing_vars=True),
                            summary_op=summary_op,
                            save_summaries_secs=FLAGS.save_summaries_secs,
                            save_interval_secs=FLAGS.save_interval_secs)
Beispiel #19
0
    def train(self):
        FLAGS = self.flags
        dataset_split = 'train'
        data_config = edict()
        data_config.edge_width = 20
        data_config.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset]
        data_config.edge_class_num = FLAGS.edge_class_num
        img_files, label_files = get_dataset_files(FLAGS.dataset,
                                                   dataset_split)

        dataset = edict()
        dataset_pp = dataset_pipeline(data_config,
                                      img_files,
                                      label_files,
                                      is_train=True)
        dataset.num_classes = DATASETS_CLASS_NUM[FLAGS.dataset]
        dataset.ignore_label = DATASETS_IGNORE_LABEL[FLAGS.dataset]
        dataset.num_samples = len(dataset_pp)

        tf.logging.set_verbosity(tf.logging.INFO)
        # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
        config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones,
                                               clone_on_cpu=FLAGS.clone_on_cpu,
                                               replica_id=FLAGS.task,
                                               num_replicas=FLAGS.num_replicas,
                                               num_ps_tasks=FLAGS.num_ps_tasks)

        # Split the batch across GPUs.
        assert FLAGS.train_batch_size % config.num_clones == 0, (
            'Training batch size not divisble by number of clones (GPUs).')

        clone_batch_size = FLAGS.train_batch_size // config.num_clones

        # Get dataset-dependent information.
        #        dataset = segmentation_dataset.get_dataset(
        #            FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir)

        tf.gfile.MakeDirs(FLAGS.train_logdir)
        tf.logging.info('Training on %s set', FLAGS.train_split)

        with tf.Graph().as_default() as graph:
            with tf.device(config.inputs_device()):
                data_list = dataset_pp.iterator()
                samples = input_generator.get(
                    (data_list, dataset.ignore_label),
                    FLAGS.train_crop_size,
                    clone_batch_size,
                    min_resize_value=FLAGS.min_resize_value,
                    max_resize_value=FLAGS.max_resize_value,
                    resize_factor=FLAGS.resize_factor,
                    min_scale_factor=FLAGS.min_scale_factor,
                    max_scale_factor=FLAGS.max_scale_factor,
                    scale_factor_step_size=FLAGS.scale_factor_step_size,
                    dataset_split=FLAGS.train_split,
                    is_training=True,
                    model_variant=FLAGS.model_variant)
                inputs_queue = prefetch_queue.prefetch_queue(samples,
                                                             capacity=128 *
                                                             config.num_clones)

            # Create the global step on the device storing the variables.
            with tf.device(config.variables_device()):
                global_step = tf.train.get_or_create_global_step()

                # Define the model and create clones.
                model_fn = self._build_deeplab
                model_args = (inputs_queue, {
                    common.OUTPUT_TYPE: dataset.num_classes
                }, dataset.ignore_label)
                clones = model_deploy.create_clones(config,
                                                    model_fn,
                                                    args=model_args)

                # Gather update_ops from the first clone. These contain, for example,
                # the updates for the batch_norm variables created by model_fn.
                first_clone_scope = config.clone_scope(0)
                update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                               first_clone_scope)

            # Gather initial summaries.
            summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

            # Add summaries for model variables.
            for model_var in slim.get_model_variables():
                summaries.add(
                    tf.summary.histogram(model_var.op.name, model_var))

            label_name = ('%s/%s:0' %
                          (first_clone_scope, common.LABEL)).strip('/')
            print('first clone label name is:', label_name)

            # Add summaries for images, labels, semantic predictions
            if FLAGS.save_summaries_images:
                summary_image = graph.get_tensor_by_name(
                    ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
                summaries.add(
                    tf.summary.image('samples/%s' % common.IMAGE,
                                     summary_image))

                first_clone_label = graph.get_tensor_by_name(
                    ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))

                # Scale up summary image pixel values for better visualization.
                pixel_scaling = max(1, 255 // dataset.num_classes)
                summary_label = tf.cast(first_clone_label * pixel_scaling,
                                        tf.uint8)
                summaries.add(
                    tf.summary.image('samples/%s' % common.LABEL,
                                     summary_label))

                first_clone_output = graph.get_tensor_by_name(
                    ('%s/%s:0' %
                     (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
                predictions = tf.expand_dims(tf.argmax(first_clone_output, 3),
                                             -1)

                summary_predictions = tf.cast(predictions * pixel_scaling,
                                              tf.uint8)
                summaries.add(
                    tf.summary.image('samples/%s' % common.OUTPUT_TYPE,
                                     summary_predictions))

            # Add summaries for miou,acc
            labels = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
            predictions = graph.get_tensor_by_name(
                ('%s/%s:0' %
                 (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
            predictions = tf.image.resize_bilinear(predictions,
                                                   tf.shape(labels)[1:3],
                                                   align_corners=True)

            labels = tf.reshape(labels, shape=[-1])
            predictions = tf.reshape(tf.argmax(predictions, 3), shape=[-1])
            weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

            # Set ignore_label regions to label 0, because metrics.mean_iou requires
            # range of labels = [0, dataset.num_classes). Note the ignore_label regions
            # are not evaluated since the corresponding regions contain weights = 0.
            labels = tf.where(tf.equal(labels, dataset.ignore_label),
                              tf.zeros_like(labels), labels)

            # Define the evaluation metric.
            metric_map = {}
            metric_map['miou'], _ = tf.metrics.mean_iou(predictions,
                                                        labels,
                                                        dataset.num_classes,
                                                        weights=weights)
            metric_map['acc'], _ = tf.metrics.accuracy(
                labels=labels,
                predictions=predictions,
                weights=tf.reshape(weights, shape=[-1]))

            for x in ['miou', 'acc']:
                summaries.add(
                    tf.summary.scalar('metrics/%s' % x, metric_map[x]))

            # Add summaries for losses.
            for loss in tf.get_collection(tf.GraphKeys.LOSSES,
                                          first_clone_scope):
                summaries.add(
                    tf.summary.scalar('losses/%s' % loss.op.name, loss))

            # Build the optimizer based on the device specification.
            with tf.device(config.optimizer_device()):
                learning_rate = train_utils.get_model_learning_rate(
                    FLAGS.learning_policy, FLAGS.base_learning_rate,
                    FLAGS.learning_rate_decay_step,
                    FLAGS.learning_rate_decay_factor,
                    FLAGS.training_number_of_steps, FLAGS.learning_power,
                    FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       FLAGS.momentum)
                summaries.add(tf.summary.scalar('learning_rate',
                                                learning_rate))

            startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps
            for variable in slim.get_model_variables():
                summaries.add(tf.summary.histogram(variable.op.name, variable))

            with tf.device(config.variables_device()):
                total_loss, grads_and_vars = model_deploy.optimize_clones(
                    clones, optimizer)
                total_loss = tf.check_numerics(total_loss,
                                               'Loss is inf or nan.')
                summaries.add(tf.summary.scalar('total_loss', total_loss))

                # Modify the gradients for biases and last layer variables.
                last_layers = model.get_extra_layer_scopes(
                    FLAGS.last_layers_contain_logits_only)
                grad_mult = train_utils.get_model_gradient_multipliers(
                    last_layers, FLAGS.last_layer_gradient_multiplier)
                if grad_mult:
                    grads_and_vars = slim.learning.multiply_gradients(
                        grads_and_vars, grad_mult)

                # Create gradient update op.
                grad_updates = optimizer.apply_gradients(
                    grads_and_vars, global_step=global_step)
                update_ops.append(grad_updates)
                update_op = tf.group(*update_ops)
                with tf.control_dependencies([update_op]):
                    train_tensor = tf.identity(total_loss, name='train_op')

            # Add the summaries from the first clone. These contain the summaries
            # created by model_fn and either optimize_clones() or _gather_clone_loss().
            summaries |= set(
                tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

            # Merge all summaries together.
            summary_op = tf.summary.merge(list(summaries))

            # Soft placement allows placing on CPU ops without GPU implementation.
            session_config = tf.ConfigProto(allow_soft_placement=True,
                                            log_device_placement=False)

            # Start the training.
            slim.learning.train(train_tensor,
                                logdir=FLAGS.train_logdir,
                                log_every_n_steps=FLAGS.log_steps,
                                master=FLAGS.master,
                                number_of_steps=FLAGS.training_number_of_steps,
                                is_chief=(FLAGS.task == 0),
                                session_config=session_config,
                                startup_delay_steps=startup_delay_steps,
                                init_fn=train_utils.get_model_init_fn(
                                    FLAGS.train_logdir,
                                    FLAGS.tf_initial_checkpoint,
                                    FLAGS.initialize_last_layer,
                                    last_layers,
                                    ignore_missing_vars=True),
                                summary_op=summary_op,
                                save_summaries_secs=FLAGS.save_summaries_secs,
                                save_interval_secs=FLAGS.save_interval_secs)
Beispiel #20
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
  config = model_deploy.DeploymentConfig(
      num_clones=FLAGS.num_clones,
      clone_on_cpu=FLAGS.clone_on_cpu,
      replica_id=FLAGS.task,
      num_replicas=FLAGS.num_replicas,
      num_ps_tasks=FLAGS.num_ps_tasks)

  # Split the batch across GPUs.
  assert FLAGS.train_batch_size % config.num_clones == 0, (
      'Training batch size not divisble by number of clones (GPUs).')

  clone_batch_size = FLAGS.train_batch_size // config.num_clones

  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.train_split, dataset_dir=FLAGS.dataset_dir)

  tf.gfile.MakeDirs(FLAGS.train_logdir)
  tf.logging.info('Training on %s set', FLAGS.train_split)

  with tf.Graph().as_default() as graph:
    with tf.device(config.inputs_device()):
      samples = input_generator.get(
          dataset,
          FLAGS.train_crop_size,
          clone_batch_size,
          min_resize_value=FLAGS.min_resize_value,
          max_resize_value=FLAGS.max_resize_value,
          resize_factor=FLAGS.resize_factor,
          min_scale_factor=FLAGS.min_scale_factor,
          max_scale_factor=FLAGS.max_scale_factor,
          scale_factor_step_size=FLAGS.scale_factor_step_size,
          dataset_split=FLAGS.train_split,
          is_training=True,
          model_variant=FLAGS.model_variant)
      inputs_queue = prefetch_queue.prefetch_queue(
          samples, capacity=128 * config.num_clones)

    # Create the global step on the device storing the variables.
    with tf.device(config.variables_device()):
      global_step = tf.train.get_or_create_global_step()

      # Define the model and create clones.
      model_fn = _build_deeplab
      model_args = (inputs_queue, {
          common.OUTPUT_TYPE: dataset.num_classes
      }, dataset.ignore_label)
      clones = model_deploy.create_clones(config, model_fn, args=model_args)

      # Gather update_ops from the first clone. These contain, for example,
      # the updates for the batch_norm variables created by model_fn.
      first_clone_scope = config.clone_scope(0)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    # Add summaries for model variables.
    for model_var in slim.get_model_variables():
      summaries.add(tf.summary.histogram(model_var.op.name, model_var))

    # Add summaries for images, labels, semantic predictions
    if FLAGS.save_summaries_images:
      summary_image = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
      summaries.add(
          tf.summary.image('samples/%s' % common.IMAGE, summary_image))

      first_clone_label = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
      # Scale up summary image pixel values for better visualization.
      pixel_scaling = max(1, 255 // dataset.num_classes)
      summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8)
      summaries.add(
          tf.summary.image('samples/%s' % common.LABEL, summary_label))

      first_clone_output = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
      predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1)

      summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8)
      summaries.add(
          tf.summary.image(
              'samples/%s' % common.OUTPUT_TYPE, summary_predictions))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Build the optimizer based on the device specification.
    with tf.device(config.optimizer_device()):
      learning_rate = train_utils.get_model_learning_rate(
          FLAGS.learning_policy, FLAGS.base_learning_rate,
          FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
          FLAGS.training_number_of_steps, FLAGS.learning_power,
          FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
      optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)
      summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps
    for variable in slim.get_model_variables():
      summaries.add(tf.summary.histogram(variable.op.name, variable))

    with tf.device(config.variables_device()):
      total_loss, grads_and_vars = model_deploy.optimize_clones(
          clones, optimizer)
      total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
      summaries.add(tf.summary.scalar('total_loss', total_loss))

      # Modify the gradients for biases and last layer variables.
      last_layers = model.get_extra_layer_scopes(
          FLAGS.last_layers_contain_logits_only)
      grad_mult = train_utils.get_model_gradient_multipliers(
          last_layers, FLAGS.last_layer_gradient_multiplier)
      if grad_mult:
        grads_and_vars = slim.learning.multiply_gradients(
            grads_and_vars, grad_mult)

      # Create gradient update op.
      grad_updates = optimizer.apply_gradients(
          grads_and_vars, global_step=global_step)
      update_ops.append(grad_updates)
      update_op = tf.group(*update_ops)
      with tf.control_dependencies([update_op]):
        train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(
        tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries))

    # Soft placement allows placing on CPU ops without GPU implementation.
    session_config = tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=False)

    # Start the training.
    slim.learning.train(
        train_tensor,
        logdir=FLAGS.train_logdir,
        log_every_n_steps=FLAGS.log_steps,
        master=FLAGS.master,
        number_of_steps=FLAGS.training_number_of_steps,
        is_chief=(FLAGS.task == 0),
        session_config=session_config,
        startup_delay_steps=startup_delay_steps,
        init_fn=train_utils.get_model_init_fn(
            FLAGS.train_logdir,
            FLAGS.tf_initial_checkpoint,
            FLAGS.initialize_last_layer,
            last_layers,
            ignore_missing_vars=True),
        summary_op=summary_op,
        save_summaries_secs=FLAGS.save_summaries_secs,
        save_interval_secs=FLAGS.save_interval_secs)
Beispiel #21
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir)

  tf.gfile.MakeDirs(FLAGS.eval_logdir)
  tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

  with tf.Graph().as_default():
    samples = input_generator.get(
        dataset,
        FLAGS.eval_crop_size,
        FLAGS.eval_batch_size,
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        dataset_split=FLAGS.eval_split,
        is_training=False,
        model_variant=FLAGS.model_variant)

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=FLAGS.eval_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    if tuple(FLAGS.eval_scales) == (1.0,): # syaru: `(1.0,)`, it's a tupple!
      tf.logging.info('Performing single-scale test.')
      """ 
      syaru: 
      Returns: A dictionary with keys specifying the output_type (e.g., semantic
      prediction) and values storing Tensors representing predictions (argmax
      over channels). Each prediction has size [batch, height, width].
      """
      predictions = model.predict_labels(samples[common.IMAGE], model_options,
                                         image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    # syaru: common.OUTPUT_TYPE = 'semantic'
    #        common.LABEL='label'
    # `tf.reshape(..., shape=[-1])`: -1自动匹配reshape成一维,下述操作将predictions和labels从[batch, image_height, image_width, 1]平铺成一维
    # `tf.not_equal(x, y, name=None)`: Returns a Tensor of type bool, which represents the truth value of (x != y) element-wise.
    predictions = predictions[common.OUTPUT_TYPE]
    predictions = tf.reshape(predictions, shape=[-1])
    labels = tf.reshape(samples[common.LABEL], shape=[-1])
    weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

    # Set ignore_label regions to label 0, because metrics.mean_iou requires
    # range of labels = [0, dataset.num_classes]. Note the ignore_label regions
    # are not evaluated since the corresponding regions contain weights = 0.
    # syaru :label等于ignore_label的样本将不参与Loss计算,并且反向传播时梯度直接置0. 
    # 为了符合tf.metrics.mean_iou的range([0, dataset.num_classes]), 利用`tf.where(input, a, b)`实现.
    # `tf.where(input, a, b)`: 其中a,b均为尺寸一致的tensor,作用是将a中对应input中true的位置的元素值不变,其余元素进行替换.
    # `tf.zeros_like(labels)`: 生成与labels shape相同的的全零tensor,作为参数pass进tf.where将labels中值为255(dataset.ignore_label=255)的替换为0.
    labels = tf.where(
        tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)

    predictions_tag = 'miou'
    for eval_scale in FLAGS.eval_scales:
      predictions_tag += '_' + str(eval_scale)
    if FLAGS.add_flipped_images:
      predictions_tag += '_flipped'

    # Define the evaluation metric.
    # syaru: With regard to segmentation task, in `tensorflow/python/ops/metrics_impl.py`, IOU is defined as follows:
    # IOU = true_positive / (true_positive + false_positive + false_negative).
    # 其实很好理解, segmentation task相当于dense prediction, prediction/labels都平铺成一维, false negative是两者都黑的部分, 不参与交并比的计算. 
    metric_map = {}
    metric_map[predictions_tag] = tf.metrics.mean_iou(
        predictions, labels, dataset.num_classes, weights=weights)

    # syaru: `tensorflow/contrib/ops/metrics/python/ops/metric_ops.py`
    # Pairing metric names with their associated value and update ops (when the list of metrics is long) -> (value_tensor, update_op).
    # metric name: 'miou_1.0' (only one name in this ocasion), value and update ops: tf.metrics.mean_iou(predictions, labels, dataset.num_classes, weights=weights)
    # Returns: A `dictionary` from `metric names to value ops` and a `dictionary` from `metric names to update ops`.
    # In this ocasion, it will returns `dict1={'miou_1.0': value_tensor}, dict2={'miou_1.0': update_op}`
    metrics_to_values, metrics_to_updates = (
        tf.contrib.metrics.aggregate_metric_map(metric_map))

    # syaru: key: `metric_name, value: `metric_value`
    for metric_name, metric_value in six.iteritems(metrics_to_values):
      slim.summaries.add_scalar_summary(
          metric_value, metric_name, print_summary=True)

    # syaru: 'eval_batch_size'= 1, dataset.num_samples为`eval`验证集的图片数
    num_batches = int(
        math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))

    tf.logging.info('Eval num images %d', dataset.num_samples)
    tf.logging.info('Eval batch size %d and num batch %d',
                    FLAGS.eval_batch_size, num_batches)

    # syaru: 命令行传入的max_number_of_evaluations=1, num_eval_iters!=None时, evaluation_loop只会迭代一次
    # `tensorflow/contrib/slim/python/slim/evaluation.py`
    num_eval_iters = None
    if FLAGS.max_number_of_evaluations > 0:
      num_eval_iters = FLAGS.max_number_of_evaluations
    slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.checkpoint_dir,
        logdir=FLAGS.eval_logdir,
        num_evals=num_batches,
        eval_op=list(metrics_to_updates.values()),
        max_number_of_evaluations=num_eval_iters,
        eval_interval_secs=FLAGS.eval_interval_secs)
Beispiel #22
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir)
  train_id_to_eval_id = None
  if dataset.name == segmentation_dataset.get_cityscapes_dataset_name():
    tf.logging.info('Cityscapes requires converting train_id to eval_id.')
    train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID

  # Prepare for visualization.
  tf.gfile.MakeDirs(FLAGS.vis_logdir)
  save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER)
  tf.gfile.MakeDirs(save_dir)
  raw_save_dir = os.path.join(
      FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
  tf.gfile.MakeDirs(raw_save_dir)

  tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

  g = tf.Graph()
  with g.as_default():
    samples = input_generator.get(dataset,
                                  FLAGS.vis_crop_size,
                                  FLAGS.vis_batch_size,
                                  min_resize_value=FLAGS.min_resize_value,
                                  max_resize_value=FLAGS.max_resize_value,
                                  resize_factor=FLAGS.resize_factor,
                                  dataset_split=FLAGS.vis_split,
                                  is_training=False,
                                  model_variant=FLAGS.model_variant)

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=FLAGS.vis_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
      predictions = model.predict_labels(
          samples[common.IMAGE],
          model_options=model_options,
          image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    predictions = predictions[common.OUTPUT_TYPE]

    if FLAGS.min_resize_value and FLAGS.max_resize_value:
      # Only support batch_size = 1, since we assume the dimensions of original
      # image after tf.squeeze is [height, width, 3].
      assert FLAGS.vis_batch_size == 1

      # Reverse the resizing and padding operations performed in preprocessing.
      # First, we slice the valid regions (i.e., remove padded region) and then
      # we reisze the predictions back.
      original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE])
      original_image_shape = tf.shape(original_image)
      predictions = tf.slice(
          predictions,
          [0, 0, 0],
          [1, original_image_shape[0], original_image_shape[1]])
      resized_shape = tf.to_int32([tf.squeeze(samples[common.HEIGHT]),
                                   tf.squeeze(samples[common.WIDTH])])
      predictions = tf.squeeze(
          tf.image.resize_images(tf.expand_dims(predictions, 3),
                                 resized_shape,
                                 method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                                 align_corners=True), 3)

    tf.train.get_or_create_global_step()
    saver = tf.train.Saver(slim.get_variables_to_restore())
    sv = tf.train.Supervisor(graph=g,
                             logdir=FLAGS.vis_logdir,
                             init_op=tf.global_variables_initializer(),
                             summary_op=None,
                             summary_writer=None,
                             global_step=None,
                             saver=saver)
    num_batches = int(math.ceil(
        dataset.num_samples / float(FLAGS.vis_batch_size)))
    last_checkpoint = None

    # Loop to visualize the results when new checkpoint is created.
    num_iters = 0
    while (FLAGS.max_number_of_iterations <= 0 or
           num_iters < FLAGS.max_number_of_iterations):
      num_iters += 1
      last_checkpoint = slim.evaluation.wait_for_new_checkpoint(
          FLAGS.checkpoint_dir, last_checkpoint)
      start = time.time()
      tf.logging.info(
          'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                       time.gmtime()))
      tf.logging.info('Visualizing with model %s', last_checkpoint)

      with sv.managed_session(FLAGS.master,
                              start_standard_services=False) as sess:
        sv.start_queue_runners(sess)
        sv.saver.restore(sess, last_checkpoint)

        image_id_offset = 0
        for batch in range(num_batches):
          tf.logging.info('Visualizing batch %d / %d', batch + 1, num_batches)
          _process_batch(sess=sess,
                         original_images=samples[common.ORIGINAL_IMAGE],
                         semantic_predictions=predictions,
                         image_names=samples[common.IMAGE_NAME],
                         image_heights=samples[common.HEIGHT],
                         image_widths=samples[common.WIDTH],
                         image_id_offset=image_id_offset,
                         save_dir=save_dir,
                         raw_save_dir=raw_save_dir,
                         train_id_to_eval_id=train_id_to_eval_id)
          image_id_offset += FLAGS.vis_batch_size

      tf.logging.info(
          'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                       time.gmtime()))
      time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
      if time_to_next_eval > 0:
        time.sleep(time_to_next_eval)
Beispiel #23
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(
        FLAGS.dataset,
        FLAGS.eval_split,
        dataset_dir=FLAGS.dataset_dir,
        use_input_hints=FLAGS.input_hints,
        hint_types=FLAGS.hint_types)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    if FLAGS.force_dropout:
        raise Exception("Need to implement force dropout for eval.py")

    with tf.Graph().as_default():
        samples = input_generator.get(dataset,
                                      FLAGS.eval_crop_size,
                                      FLAGS.eval_batch_size,
                                      min_resize_value=FLAGS.min_resize_value,
                                      max_resize_value=FLAGS.max_resize_value,
                                      resize_factor=FLAGS.resize_factor,
                                      dataset_split=FLAGS.eval_split,
                                      is_training=False,
                                      model_variant=FLAGS.model_variant)

        if FLAGS.input_hints:
            ###
            # TODO: Can modify this to checkerboard block hints.
            if 'dynamic_class_partial_boundary_hint' in FLAGS.hint_types:
                assert len(
                    FLAGS.hint_types
                ) == 1, 'When using dynamic partial boundary class hints, do not use other hint types!'
                print("----")
                print(
                    "eval.py: Partial boundary hints with grid {}x{}.".format(
                        FLAGS.dynamic_class_partial_boundary_hint_B,
                        FLAGS.dynamic_class_partial_boundary_hint_B))
                print("eval.py: Drawing blocks with p {}.".format(
                    FLAGS.dynamic_class_partial_boundary_hint_p))
                if FLAGS.dynamic_class_partial_boundary_full_block:
                    print(
                        "eval.py: Keeping entire block instead of masking boundaries."
                        .format(FLAGS.boundary_threshold))
                else:
                    print(
                        "eval.py: Masking with boundary threshold {}.".format(
                            FLAGS.boundary_threshold))

                print("----")

                if FLAGS.dynamic_class_partial_boundary_full_block:
                    boundary_mask = tf.cast(
                        tf.ones_like(samples[common.LABEL]), tf.uint8)
                else:
                    boundary_mask = tf.cast(
                        tf.less(samples[common.BOUNDARY_DMAP],
                                FLAGS.boundary_threshold), tf.uint8)

                class_hints, hinted = tf.py_func(
                    func=train_utils.generate_class_partial_boundaries_helper(
                        B=FLAGS.dynamic_class_partial_boundary_hint_B,
                        p=FLAGS.dynamic_class_partial_boundary_hint_p),
                    inp=[samples[common.LABEL], boundary_mask],
                    Tout=[tf.uint8, tf.bool])
                samples[common.HINT] = class_hints
                samples[common.HINT].set_shape(
                    samples[common.LABEL].get_shape().as_list())
                # Now preprocess this. Set the flag so that  the rest of the work will be done as usual.
                FLAGS.hint_types = ['class_hint']
            ###

            if 'dynamic_class_hint' in FLAGS.hint_types:
                assert len(
                    FLAGS.hint_types
                ) == 1, 'When using dynamic class hints, do not use other hint types!'
                print("----")
                print("eval.py: Drawing hints with geo mean {}.".format(
                    FLAGS.dynamic_class_hint_geo_mean))
                print("eval.py: Masking with boundary threshold {}.".format(
                    FLAGS.boundary_threshold))
                print("----")
                boundary_mask = tf.cast(
                    tf.less(samples[common.BOUNDARY_DMAP],
                            FLAGS.boundary_threshold), tf.uint8)
                class_hints, hinted = tf.py_func(
                    func=train_utils.generate_class_clicks_helper(
                        geo_mean=FLAGS.dynamic_class_hint_geo_mean),
                    inp=[samples[common.LABEL], boundary_mask],
                    Tout=[tf.uint8, tf.bool])
                samples[common.HINT] = class_hints
                samples[common.HINT].set_shape(
                    samples[common.LABEL].get_shape().as_list())
                # Now preprocess this. Set the flag so that  the rest of the work will be done as usual.
                FLAGS.hint_types = ['class_hint']

            # If using class hints, preprocess into num_class binary mask channels
            if 'class_hint' in FLAGS.hint_types:
                assert len(
                    FLAGS.hint_types
                ) == 1, 'When using class hints, do not use other hint types!'
                num_classes = dataset.num_classes
                print('eval.py: num classes is {}'.format(num_classes))
                class_hint_channels_list = []
                for label in range(num_classes):
                    # Multiply by 255 is to bring into same range as image pixels...,
                    # and so feature_extractor mean subtraction will reduce it back to 0,1 range
                    class_hint_channel = tf.to_float(
                        tf.equal(samples[common.HINT], label)) * 255
                    class_hint_channels_list.append(class_hint_channel)
                class_hint_channels = tf.concat(class_hint_channels_list,
                                                axis=-1)
                samples[common.HINT] = class_hint_channels

            # Get hints and concat to image as input into network
            samples[common.HINT] = tf.identity(samples[common.HINT],
                                               name=common.HINT)
            model_inputs = tf.concat(
                [samples[common.IMAGE],
                 tf.to_float(samples[common.HINT])],
                axis=-1)
        else:
            # Just image is input into network
            model_inputs = samples[common.IMAGE]
        print('eval.py: shape {}'.format(model_inputs.get_shape().as_list()))

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=FLAGS.eval_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                # samples[common.IMAGE],
                model_inputs,
                model_options=model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                # samples[common.IMAGE],
                model_inputs,
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        predictions = predictions[common.OUTPUT_TYPE]

        #predictions = tf.Print(predictions, predictions.get_shape())
        #from utils import train_utils
        #gen_boundaries = train_utils.generate_boundaries_helper(pixel_shift=1, ignore_label=255, distance_map=True, distance_map_scale=100, set_ignore_regions_to_ignore_value=False)
        #prediction_distance_map, _ = gen_boundaries(tf.to_float(tf.reshape(predictions, [1,1025, 2049])))
        #label_distance_map, _ = gen_boundaries(tf.to_float(tf.reshape(labels, [1, 1025, 2049])))

        predictions = tf.reshape(predictions, shape=[-1])
        labels = tf.reshape(samples[common.LABEL], shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights =
        # 0.
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        for eval_scale in FLAGS.eval_scales:
            predictions_tag += '_' + str(eval_scale)
        if FLAGS.add_flipped_images:
            predictions_tag += '_flipped'

        # Define the evaluation metric.
        metric_map = {}

        # mean iou
        metric_map[predictions_tag] = tf.metrics.mean_iou(predictions,
                                                          labels,
                                                          dataset.num_classes,
                                                          weights=weights)

        # boundary distancemap l2
        #metric_map['boundary distance L2'] = tf.metrics.mean_squared_error(
        #prediction_distance_map, label_distance_map)

        metrics_to_values, metrics_to_updates = (
            tf.contrib.metrics.aggregate_metric_map(metric_map))

        for metric_name, metric_value in six.iteritems(metrics_to_values):
            slim.summaries.add_scalar_summary(metric_value,
                                              metric_name,
                                              print_summary=True)

        num_batches = int(
            math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))

        tf.logging.info('Eval num images %d', dataset.num_samples)
        tf.logging.info('Eval batch size %d and num batch %d',
                        FLAGS.eval_batch_size, num_batches)

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations
        config = tf.ConfigProto(intra_op_parallelism_threads=16,
                                inter_op_parallelism_threads=1)
        slim.evaluation.evaluation_loop(
            master=FLAGS.master,
            checkpoint_dir=FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_logdir,
            num_evals=num_batches,
            eval_op=list(metrics_to_updates.values()),
            max_number_of_evaluations=num_eval_iters,
            eval_interval_secs=FLAGS.eval_interval_secs,
            session_config=config)
Beispiel #24
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.eval_split,
                                               dataset_dir=FLAGS.dataset_dir)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    g = tf.Graph()
    with g.as_default():
        samples = input_generator.get(dataset,
                                      FLAGS.eval_crop_size,
                                      FLAGS.eval_batch_size,
                                      min_resize_value=FLAGS.min_resize_value,
                                      max_resize_value=FLAGS.max_resize_value,
                                      resize_factor=FLAGS.resize_factor,
                                      dataset_split=FLAGS.eval_split,
                                      is_training=False,
                                      model_variant=FLAGS.model_variant)

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=FLAGS.eval_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)

        predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])

        (num_batches, summary_op, metrics_to_updates, confusion_matrix,
         category_iou) = create_metrics(g, samples, dataset, predictions)

        num_eval_iters = None

        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations
        slim.evaluation.evaluation_loop(
            master=FLAGS.master,
            checkpoint_dir=FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_logdir,
            num_evals=num_batches,
            summary_op=summary_op,
            eval_op=list(metrics_to_updates.values()),
            max_number_of_evaluations=num_eval_iters,
            eval_interval_secs=FLAGS.eval_interval_secs,
            final_op=[confusion_matrix, category_iou])
Beispiel #25
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.vis_split,
                                               dataset_dir=FLAGS.dataset_dir)
    train_id_to_eval_id = None
    if dataset.name == segmentation_dataset.get_cityscapes_dataset_name():
        tf.logging.info('Cityscapes requires converting train_id to eval_id.')
        train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID

    # Prepare for visualization.
    tf.gfile.MakeDirs(FLAGS.vis_logdir)
    save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(save_dir)
    raw_save_dir = os.path.join(FLAGS.vis_logdir,
                                _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(raw_save_dir)

    tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

    g = tf.Graph()
    with g.as_default():
        samples = input_generator.get(dataset,
                                      FLAGS.vis_crop_size,
                                      FLAGS.vis_batch_size,
                                      min_resize_value=FLAGS.min_resize_value,
                                      max_resize_value=FLAGS.max_resize_value,
                                      resize_factor=FLAGS.resize_factor,
                                      dataset_split=FLAGS.vis_split,
                                      is_training=False,
                                      model_variant=FLAGS.model_variant)

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=FLAGS.vis_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options=model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        predictions = predictions[common.OUTPUT_TYPE]

        if FLAGS.min_resize_value and FLAGS.max_resize_value:
            # Only support batch_size = 1, since we assume the dimensions of original
            # image after tf.squeeze is [height, width, 3].
            assert FLAGS.vis_batch_size == 1

            # Reverse the resizing and padding operations performed in preprocessing.
            # First, we slice the valid regions (i.e., remove padded region) and then
            # we reisze the predictions back.
            original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE])
            original_image_shape = tf.shape(original_image)
            predictions = tf.slice(
                predictions, [0, 0, 0],
                [1, original_image_shape[0], original_image_shape[1]])
            resized_shape = tf.to_int32([
                tf.squeeze(samples[common.HEIGHT]),
                tf.squeeze(samples[common.WIDTH])
            ])
            predictions = tf.squeeze(
                tf.image.resize_images(
                    tf.expand_dims(predictions, 3),
                    resized_shape,
                    method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                    align_corners=True), 3)

        if FLAGS.is_quant:
            tf.contrib.quantize.experimental_create_eval_graph(
                weight_bits=FLAGS.weight_bits,
                activation_bits=FLAGS.activation_bits)

        tf.train.get_or_create_global_step()
        saver = tf.train.Saver(slim.get_variables_to_restore())
        sv = tf.train.Supervisor(graph=g,
                                 logdir=FLAGS.vis_logdir,
                                 init_op=tf.global_variables_initializer(),
                                 summary_op=None,
                                 summary_writer=None,
                                 global_step=None,
                                 saver=saver)
        num_batches = int(
            math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size)))
        last_checkpoint = None

        # Loop to visualize the results when new checkpoint is created.
        num_iters = 0
        while (FLAGS.max_number_of_iterations <= 0
               or num_iters < FLAGS.max_number_of_iterations):
            num_iters += 1
            last_checkpoint = slim.evaluation.wait_for_new_checkpoint(
                FLAGS.checkpoint_dir, last_checkpoint)
            start = time.time()
            tf.logging.info('Starting visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
            tf.logging.info('Visualizing with model %s', last_checkpoint)

            with sv.managed_session(FLAGS.master,
                                    start_standard_services=False) as sess:
                sv.start_queue_runners(sess)
                sv.saver.restore(sess, last_checkpoint)

                image_id_offset = 0
                for batch in range(num_batches):
                    tf.logging.info('Visualizing batch %d / %d', batch + 1,
                                    num_batches)
                    _process_batch(
                        sess=sess,
                        original_images=samples[common.ORIGINAL_IMAGE],
                        semantic_predictions=predictions,
                        image_names=samples[common.IMAGE_NAME],
                        image_heights=samples[common.HEIGHT],
                        image_widths=samples[common.WIDTH],
                        image_id_offset=image_id_offset,
                        save_dir=save_dir,
                        raw_save_dir=raw_save_dir,
                        train_id_to_eval_id=train_id_to_eval_id)
                    image_id_offset += FLAGS.vis_batch_size

            tf.logging.info('Finished visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
            time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
            if time_to_next_eval > 0:
                time.sleep(time_to_next_eval)
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir)

  tf.gfile.MakeDirs(FLAGS.eval_logdir)
  tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

  with tf.Graph().as_default():
    samples = input_generator.get(
        dataset,
        FLAGS.eval_crop_size,
        FLAGS.eval_batch_size,
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        dataset_split=FLAGS.eval_split,
        is_training=False,
        model_variant=FLAGS.model_variant)

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=FLAGS.eval_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
      predictions = model.predict_labels(samples[common.IMAGE], model_options,
                                         image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    predictions = predictions[common.OUTPUT_TYPE]
    predictions = tf.reshape(predictions, shape=[-1])
    labels = tf.reshape(samples[common.LABEL], shape=[-1])
    weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

    # Set ignore_label regions to label 0, because metrics.mean_iou requires
    # range of labels = [0, dataset.num_classes). Note the ignore_label regions
    # are not evaluated since the corresponding regions contain weights = 0.
    labels = tf.where(
        tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)

    predictions_tag = 'miou'
    for eval_scale in FLAGS.eval_scales:
      predictions_tag += '_' + str(eval_scale)
    if FLAGS.add_flipped_images:
      predictions_tag += '_flipped'

    # Define the evaluation metric.
    metric_map = {}
    metric_map[predictions_tag] = tf.metrics.mean_iou(
        predictions, labels, dataset.num_classes, weights=weights)

    metrics_to_values, metrics_to_updates = (
        tf.contrib.metrics.aggregate_metric_map(metric_map))

    for metric_name, metric_value in six.iteritems(metrics_to_values):
      slim.summaries.add_scalar_summary(
          metric_value, metric_name, print_summary=True)

    num_batches = int(
        math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))

    tf.logging.info('Eval num images %d', dataset.num_samples)
    tf.logging.info('Eval batch size %d and num batch %d',
                    FLAGS.eval_batch_size, num_batches)

    num_eval_iters = None
    if FLAGS.max_number_of_evaluations > 0:
      num_eval_iters = FLAGS.max_number_of_evaluations

    ''' Original code:

	slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.checkpoint_dir,
        logdir=FLAGS.eval_logdir,
        num_evals=num_batches,
        eval_op=list(metrics_to_updates.values()),
        max_number_of_evaluations=num_eval_iters,
        eval_interval_secs=FLAGS.eval_interval_secs)
    '''
    #Code change for checkpoint path
    #saver = None
    #variables_to_restore = slim.get_variables_to_restore()
    #saver = tf.train.Saver(variables_to_restore)
    #variables_to_restore = slim.get_variables_to_restore()
    
    #initial_op = tf.group(
     # tf.global_variables_initializer(),
      #tf.local_variables_initializer())


    #num_batches = math.ceil(1449/float(144))
    
    #names_to_values, names_to_updates = slim.metrics.aggregate_metric_map({
     #'Accuracy': slim.metrics.streaming_accuracy(tf.cast(predictions,tf.int32),labels),})
      #"mse": slim.metrics.mean_squared_error(tf.cast(predictions,tf.int32),labels),})
      #'Recall_5': slim.metrics.streaming_recall_at_k(
      #      logits, labels, 5),})
      #"mse": slim.metrics.mean_squared_error(tf.cast(predictions,tf.int32),labels),
  
    metric_values = slim.evaluation.evaluate_once(
    master=FLAGS.master,
    checkpoint_path=FLAGS.checkpoint_dir,
    logdir=FLAGS.eval_logdir,
    num_evals=num_batches,
    eval_op=metrics_to_updates.values(),
    final_op=metrics_to_values.values(),)
    #scaffold=tf.train.Scaffold( init_op=initial_op, init_feed_dict=None, saver=saver),
    #variables_to_restore=variables_to_restore,
    #initial_op=initial_op)
    #eval_op=list(names_to_updates.values()),
    #final_op=list(names_to_values.values()),)
    #eval_interval_secs=FLAGS.eval_interval_secs)
    for metric,value in zip(metrics_to_values.keys(), metric_values):
        tf.logging.info('Metric %s has value %f',metric,value)
Beispiel #27
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir)

  tf.gfile.MakeDirs(FLAGS.eval_logdir)
  tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

  with tf.Graph().as_default():
    samples = input_generator.get(
        dataset,
        FLAGS.eval_crop_size,
        FLAGS.eval_batch_size,
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        dataset_split=FLAGS.eval_split,
        is_training=False,
        model_variant=FLAGS.model_variant)

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=FLAGS.eval_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
      predictions = model.predict_labels(samples[common.IMAGE], model_options,
                                         image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    predictions = predictions[common.OUTPUT_TYPE]
    predictions = tf.reshape(predictions, shape=[-1])
    labels = tf.reshape(samples[common.LABEL], shape=[-1])
    weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

    # Set ignore_label regions to label 0, because metrics.mean_iou requires
    # range of labels = [0, dataset.num_classes). Note the ignore_label regions
    # are not evaluated since the corresponding regions contain weights = 0.
    labels = tf.where(
        tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)

    predictions_tag = 'miou'
    for eval_scale in FLAGS.eval_scales:
      predictions_tag += '_' + str(eval_scale)
    if FLAGS.add_flipped_images:
      predictions_tag += '_flipped'

    # Define the evaluation metric.
    metric_map = {}
    metric_map[predictions_tag] = tf.metrics.mean_iou(
        predictions, labels, dataset.num_classes, weights=weights)

    metrics_to_values, metrics_to_updates = (
        tf.contrib.metrics.aggregate_metric_map(metric_map))

    for metric_name, metric_value in six.iteritems(metrics_to_values):
      slim.summaries.add_scalar_summary(
          metric_value, metric_name, print_summary=True)

    num_batches = int(
        math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))

    tf.logging.info('Eval num images %d', dataset.num_samples)
    tf.logging.info('Eval batch size %d and num batch %d',
                    FLAGS.eval_batch_size, num_batches)

    num_eval_iters = None
    if FLAGS.max_number_of_evaluations > 0:
      num_eval_iters = FLAGS.max_number_of_evaluations
    slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.checkpoint_dir,
        logdir=FLAGS.eval_logdir,
        num_evals=num_batches,
        eval_op=list(metrics_to_updates.values()),
        max_number_of_evaluations=num_eval_iters,
        eval_interval_secs=FLAGS.eval_interval_secs)
Beispiel #28
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.vis_split, dataset_dir=FLAGS.dataset_dir)
  train_id_to_eval_id = None

  if dataset.name == segmentation_dataset.get_cityscapes_dataset_name():
    tf.logging.info('Cityscapes requires converting train_id to eval_id.')
    train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID

  global filename_list
  global perf_all
  filename_list = open(os.path.join(FLAGS.dataset_dir, '../ReOrg/filename-val.txt'), encoding='utf-8').readlines()
  filename_list = [x.strip() for x in filename_list]
  perf_all = np.empty((len(filename_list), 3))

  # Prepare for visualization.
  tf.gfile.MakeDirs(FLAGS.vis_logdir)
  save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER)
  tf.gfile.MakeDirs(save_dir)
  raw_save_dir = os.path.join(
      FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
  tf.gfile.MakeDirs(raw_save_dir)

  tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

  g = tf.Graph()
  with g.as_default():
    samples = input_generator.get(dataset,
                                  FLAGS.vis_crop_size,
                                  FLAGS.vis_batch_size,
                                  min_resize_value=FLAGS.min_resize_value,
                                  max_resize_value=FLAGS.max_resize_value,
                                  resize_factor=FLAGS.resize_factor,
                                  dataset_split=FLAGS.vis_split,
                                  is_training=False,
                                  model_variant=FLAGS.model_variant)
                                  
    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=FLAGS.vis_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
      predictions = model.predict_labels(
          samples[common.IMAGE],
          model_options=model_options,
          image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    predictions = predictions[common.OUTPUT_TYPE]
    pred_compute = tf.reshape(predictions, shape=[-1])
    label_compute = tf.reshape(samples[common.LABEL], shape=[-1])
    weights = tf.to_float(tf.not_equal(label_compute, dataset.ignore_label))

    # Set ignore_label regions to label 0, because metrics.mean_iou requires
    # range of labels = [0, dataset.num_classes). Note the ignore_label regions
    # are not evaluated since the corresponding regions contain weights = 0.
    label_compute = tf.where(
            tf.equal(label_compute, dataset.ignore_label), tf.zeros_like(label_compute), label_compute)

    m_iou, m_accu = compute_m_iou_accu(
            pred_compute, label_compute, dataset.num_classes, weights=weights)

    # m_iou = compute_miou(
    #         pred_compute, label_compute, dataset.num_classes, weights=weights)

    # m_accu = compute_maccu(
    #         pred_compute, label_compute, dataset.num_classes, weights=weights)

    accu = compute_accu(
            pred_compute, label_compute, weights=weights)

    if FLAGS.min_resize_value and FLAGS.max_resize_value:
      # Only support batch_size = 1, since we assume the dimensions of original
      # image after tf.squeeze is [height, width, 3].
      assert FLAGS.vis_batch_size == 1

      # Reverse the resizing and padding operations performed in preprocessing.
      # First, we slice the valid regions (i.e., remove padded region) and then
      # we reisze the predictions back.
      original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE])
      original_image_shape = tf.shape(original_image)
      predictions = tf.slice(
          predictions,
          [0, 0, 0],
          [1, original_image_shape[0], original_image_shape[1]])
      resized_shape = tf.to_int32([tf.squeeze(samples[common.HEIGHT]),
                                   tf.squeeze(samples[common.WIDTH])])
      predictions = tf.squeeze(
          tf.image.resize_images(tf.expand_dims(predictions, 3),
                                 resized_shape,
                                 method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                                 align_corners=True), 3)

    tf.train.get_or_create_global_step()
    saver = tf.train.Saver(slim.get_variables_to_restore())
    sv = tf.train.Supervisor(graph=g,
                             logdir=FLAGS.vis_logdir,
                             init_op=tf.global_variables_initializer(),
                             summary_op=None,
                             summary_writer=None,
                             global_step=None,
                             saver=saver)
    num_batches = int(math.ceil(
        dataset.num_samples / float(FLAGS.vis_batch_size)))

    last_checkpoint = FLAGS.checkpoint_dir

    start = time.time()
    tf.logging.info(
        'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                    time.gmtime()))
    tf.logging.info('Visualizing with model %s', last_checkpoint)

    with sv.managed_session(FLAGS.master,
                            start_standard_services=False) as sess:
        sv.start_queue_runners(sess)
        sv.saver.restore(sess, last_checkpoint)

        image_id_offset = 1
        for batch in range(num_batches):
            tf.logging.info('Visualizing batch %d / %d', batch + 1, num_batches)
            _process_batch(sess=sess,
                            original_images=samples[common.ORIGINAL_IMAGE],
                            labels=samples[common.LABEL],
                            semantic_predictions=predictions,
                            image_names=samples[common.IMAGE_NAME],
                            image_heights=samples[common.HEIGHT],
                            image_widths=samples[common.WIDTH],
                            perf_metrics=[m_iou, m_accu, accu],
                            image_id_offset=image_id_offset,
                            save_dir=save_dir,
                            raw_save_dir=raw_save_dir,
                            train_id_to_eval_id=train_id_to_eval_id)
            image_id_offset += FLAGS.vis_batch_size

        np.savetxt(os.path.join(FLAGS.vis_logdir, 'per-image-metrics.txt'), perf_all)

        tf.logging.info(
            'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                        time.gmtime()))
Beispiel #29
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Get dataset-dependent information.
  dataset = segmentation_dataset.get_dataset(
      FLAGS.dataset, FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir)

  tf.gfile.MakeDirs(FLAGS.eval_logdir)
  tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

  with tf.Graph().as_default():
    samples = input_generator.get(
        dataset,
        FLAGS.eval_crop_size,
        FLAGS.eval_batch_size,
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        dataset_split=FLAGS.eval_split,
        is_training=False,
        model_variant=FLAGS.model_variant)

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
        crop_size=FLAGS.eval_crop_size,
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
      predictions = model.predict_labels(samples[common.IMAGE], model_options,
                                         image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    predictions = predictions[common.OUTPUT_TYPE]
    predictions = tf.reshape(predictions, shape=[-1])
    labels = tf.reshape(samples[common.LABEL], shape=[-1])
    weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

    # Set ignore_label regions to label 0, because metrics.mean_iou requires
    # range of labels = [0, dataset.num_classes). Note the ignore_label regions
    # are not evaluated since the corresponding regions contain weights = 0.
    labels = tf.where(
        tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)

    predictions_tag = 'miou'
    for eval_scale in FLAGS.eval_scales:
      predictions_tag += '_' + str(eval_scale)
    if FLAGS.add_flipped_images:
      predictions_tag += '_flipped'

    # Define the evaluation metric.
    metric_map = {}

    # insert by trobr
    indices = tf.squeeze(tf.where(tf.less_equal(
        labels, dataset.num_classes - 1)), 1)
    labels = tf.cast(tf.gather(labels, indices), tf.int32)
    predictions = tf.gather(predictions, indices)
    # end of insert

    metric_map[predictions_tag] = tf.metrics.mean_iou(
        predictions, labels, dataset.num_classes, weights=weights)



    '''
    metric_map = {}
    metric_map[predictions_tag] = tf.metrics.mean_iou(
        predictions, labels, dataset.num_classes, weights=weights)
    '''

    metrics_to_values, metrics_to_updates = (
        tf.contrib.metrics.aggregate_metric_map(metric_map))

    for metric_name, metric_value in six.iteritems(metrics_to_values):
      slim.summaries.add_scalar_summary(
          metric_value, metric_name, print_summary=True)

    num_batches = int(
        math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))

    tf.logging.info('Eval num images %d', dataset.num_samples)
    tf.logging.info('Eval batch size %d and num batch %d',
                    FLAGS.eval_batch_size, num_batches)

    num_eval_iters = None
    if FLAGS.max_number_of_evaluations > 0:
      num_eval_iters = FLAGS.max_number_of_evaluations
    slim.evaluation.evaluation_loop(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.checkpoint_dir,
        logdir=FLAGS.eval_logdir,
        num_evals=num_batches,
        eval_op=list(metrics_to_updates.values()),
        max_number_of_evaluations=num_eval_iters,
        eval_interval_secs=FLAGS.eval_interval_secs)
Beispiel #30
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = segmentation_dataset.get_dataset(FLAGS.dataset,
                                               FLAGS.vis_split,
                                               dataset_dir=FLAGS.dataset_dir)
    train_id_to_eval_id = None
    if dataset.name == segmentation_dataset.get_cityscapes_dataset_name():
        tf.logging.info('Cityscapes requires converting train_id to eval_id.')
        train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID

    # Prepare for visualization.
    tf.gfile.MakeDirs(FLAGS.vis_logdir)
    save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(save_dir)
    raw_save_dir = os.path.join(FLAGS.vis_logdir,
                                _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(raw_save_dir)

    tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

    g = tf.Graph()
    with g.as_default():
        samples = input_generator.get(dataset,
                                      FLAGS.vis_crop_size,
                                      FLAGS.vis_batch_size,
                                      min_resize_value=FLAGS.min_resize_value,
                                      max_resize_value=FLAGS.max_resize_value,
                                      resize_factor=FLAGS.resize_factor,
                                      dataset_split=FLAGS.vis_split,
                                      is_training=False,
                                      model_variant="mobilenet_v2")

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_classes},
            crop_size=FLAGS.vis_crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        mask = tf.squeeze(samples[common.LABEL])
        original_images = tf.squeeze(samples[common.ORIGINAL_IMAGE])
        images = tf.squeeze(samples[common.IMAGE])

        if FLAGS.min_resize_value and FLAGS.max_resize_value:
            # Only support batch_size = 1, since we assume the dimensions of original
            # image after tf.squeeze is [height, width, 3].
            assert FLAGS.vis_batch_size == 1

        tf.train.get_or_create_global_step()
        saver = tf.train.Saver(slim.get_variables_to_restore())
        sv = tf.train.Supervisor(graph=g,
                                 logdir=FLAGS.vis_logdir,
                                 init_op=tf.global_variables_initializer(),
                                 summary_op=None,
                                 summary_writer=None,
                                 global_step=None,
                                 saver=saver)
        num_batches = int(
            math.ceil(dataset.num_samples / float(FLAGS.vis_batch_size)))

        # Loop to visualize the results when new checkpoint is created.
        num_iters = 0
        while (FLAGS.max_number_of_iterations <= 0
               or num_iters < FLAGS.max_number_of_iterations):
            num_iters += 1

            start = time.time()
            tf.logging.info('Starting visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))

            with sv.managed_session(FLAGS.master,
                                    start_standard_services=False) as sess:
                sv.start_queue_runners(sess)

                image_id_offset = 0
                for batch in range(num_batches):
                    tf.logging.info('Visualizing batch %d / %d', batch + 1,
                                    num_batches)
                    _process_batch(sess=sess,
                                   original_images=original_images,
                                   resize_images=images,
                                   image_names=samples[common.IMAGE_NAME],
                                   mask=mask,
                                   image_heights=samples[common.HEIGHT],
                                   image_widths=samples[common.WIDTH],
                                   image_id_offset=image_id_offset,
                                   save_dir=save_dir,
                                   raw_save_dir=raw_save_dir,
                                   train_id_to_eval_id=train_id_to_eval_id)
                    image_id_offset += FLAGS.vis_batch_size

            tf.logging.info('Finished visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
            time_to_next_eval = start + FLAGS.eval_interval_secs - time.time()
            if time_to_next_eval > 0:
                time.sleep(time_to_next_eval)