def testPascalVocSegTestData(self):
        dataset = data_generator.Dataset(
            dataset_name='pascal_voc_seg',
            split_name='val',
            dataset_dir='research/deeplab/testing/pascal_voc_seg',
            batch_size=1,
            crop_size=[3, 3],  # Use small size for testing.
            min_resize_value=3,
            max_resize_value=3,
            resize_factor=None,
            min_scale_factor=0.01,
            max_scale_factor=2.0,
            scale_factor_step_size=0.25,
            is_training=False,
            model_variant='mobilenet_v2')

        self.assertAllEqual(dataset.num_of_classes, 21)
        self.assertAllEqual(dataset.ignore_label, 255)

        num_of_images = 3
        with self.test_session() as sess:
            iterator = dataset.get_one_shot_iterator()

            for i in range(num_of_images):
                batch = iterator.get_next()
                batch, = sess.run([batch])
                image_attributes = _get_attributes_of_image(i)

                self.assertAllEqual(batch[common.IMAGE][0],
                                    image_attributes.image)
                self.assertAllEqual(batch[common.LABEL][0],
                                    image_attributes.label)
                self.assertEqual(batch[common.HEIGHT][0],
                                 image_attributes.height)
                self.assertEqual(batch[common.WIDTH][0],
                                 image_attributes.width)
                self.assertEqual(batch[common.IMAGE_NAME][0],
                                 image_attributes.image_name)

            # All data have been read.
            with self.assertRaisesRegexp(tf.errors.OutOfRangeError, ''):
                sess.run([iterator.get_next()])
Exemple #2
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)

  # Get dataset-dependent information.
  dataset = data_generator.Dataset(
      dataset_name=FLAGS.dataset,
      split_name=FLAGS.vis_split,
      dataset_dir=FLAGS.dataset_dir,
      batch_size=FLAGS.vis_batch_size,
      crop_size=[int(sz) for sz in FLAGS.vis_crop_size],
      min_resize_value=FLAGS.min_resize_value,
      max_resize_value=FLAGS.max_resize_value,
      resize_factor=FLAGS.resize_factor,
      model_variant=FLAGS.model_variant,
      is_training=False,
      should_shuffle=False,
      should_repeat=False)

  train_id_to_eval_id = None
  if dataset.dataset_name == data_generator.get_cityscapes_dataset_name():
    tf.logging.info('Cityscapes requires converting train_id to eval_id.')
    train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID

  # Prepare for visualization.
  tf.gfile.MakeDirs(FLAGS.vis_logdir)
  save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER)
  tf.gfile.MakeDirs(save_dir)
  raw_save_dir = os.path.join(
      FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
  tf.gfile.MakeDirs(raw_save_dir)

  tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

  with tf.Graph().as_default():
    samples = dataset.get_one_shot_iterator().get_next()

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes},
        crop_size=[int(sz) for sz in FLAGS.vis_crop_size],
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
      predictions = model.predict_labels(
          samples[common.IMAGE],
          model_options=model_options,
          image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      if FLAGS.quantize_delay_step >= 0:
        raise ValueError(
            'Quantize mode is not supported with multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    predictions = predictions[common.OUTPUT_TYPE]

    if FLAGS.min_resize_value and FLAGS.max_resize_value:
      # Only support batch_size = 1, since we assume the dimensions of original
      # image after tf.squeeze is [height, width, 3].
      assert FLAGS.vis_batch_size == 1

      # Reverse the resizing and padding operations performed in preprocessing.
      # First, we slice the valid regions (i.e., remove padded region) and then
      # we resize the predictions back.
      original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE])
      original_image_shape = tf.shape(original_image)
      predictions = tf.slice(
          predictions,
          [0, 0, 0],
          [1, original_image_shape[0], original_image_shape[1]])
      resized_shape = tf.to_int32([tf.squeeze(samples[common.HEIGHT]),
                                   tf.squeeze(samples[common.WIDTH])])
      predictions = tf.squeeze(
          tf.image.resize_images(tf.expand_dims(predictions, 3),
                                 resized_shape,
                                 method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                                 align_corners=True), 3)

    tf.train.get_or_create_global_step()
    if FLAGS.quantize_delay_step >= 0:
      contrib_quantize.create_eval_graph()

    num_iteration = 0
    max_num_iteration = FLAGS.max_number_of_iterations

    checkpoints_iterator = contrib_training.checkpoints_iterator(
        FLAGS.checkpoint_dir, min_interval_secs=FLAGS.eval_interval_secs)
    for checkpoint_path in checkpoints_iterator:
      num_iteration += 1
      tf.logging.info(
          'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                       time.gmtime()))
      tf.logging.info('Visualizing with model %s', checkpoint_path)

      scaffold = tf.train.Scaffold(init_op=tf.global_variables_initializer())
      session_creator = tf.train.ChiefSessionCreator(
          scaffold=scaffold,
          master=FLAGS.master,
          checkpoint_filename_with_path=checkpoint_path)
      with tf.train.MonitoredSession(
          session_creator=session_creator, hooks=None) as sess:
        batch = 0
        image_id_offset = 0

        while not sess.should_stop():
          tf.logging.info('Visualizing batch %d', batch + 1)
          _process_batch(sess=sess,
                         original_images=samples[common.ORIGINAL_IMAGE],
                         semantic_predictions=predictions,
                         image_names=samples[common.IMAGE_NAME],
                         image_heights=samples[common.HEIGHT],
                         image_widths=samples[common.WIDTH],
                         image_id_offset=image_id_offset,
                         save_dir=save_dir,
                         raw_save_dir=raw_save_dir,
                         train_id_to_eval_id=train_id_to_eval_id)
          image_id_offset += FLAGS.vis_batch_size
          batch += 1

      tf.logging.info(
          'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                       time.gmtime()))
      if max_num_iteration > 0 and num_iteration >= max_num_iteration:
        break
Exemple #3
0
def train():
    # From build_cityscapes_data.py: example = image_data, filename, height, width, seg_data
    tf.logging.set_verbosity(tf.logging.INFO)
    clone_batch_size = FLAGS.train_batch_size

    dataset = data_generator.Dataset(
        dataset_name=FLAGS.dataset,
        split_name=FLAGS.train_split,
        dataset_dir=FLAGS.dataset_dir,
        batch_size=clone_batch_size,
        crop_size=[int(sz) for sz in FLAGS.train_crop_size],
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        min_scale_factor=FLAGS.min_scale_factor,
        max_scale_factor=FLAGS.max_scale_factor,
        scale_factor_step_size=FLAGS.scale_factor_step_size,
        model_variant=None,
        num_readers=10,
        is_training=True,
        should_shuffle=True,
        should_repeat=True)

    # reading batch: keys of samples ['height', 'width', 'image_name', 'label', 'image']
    num_classes = dataset.num_of_classes
    samples = dataset.get_one_shot_iterator().get_next()
    in_imgs = samples['image'] / 255
    labels = samples['label']  #channel=1

    latents, skip = seg_encoder('Encoder', in_imgs, training=True)
    if FLAGS.use_skip_1by1:
        skip = tf.layers.conv2d(inputs=skip,
                                filters=32,
                                kernel_size=[1, 1],
                                strides=(1, 1),
                                use_bias=False,
                                padding="same")
    if not FLAGS.use_skip:
        skip = None

    logits = seg_decoder('Decoder',
                         latents,
                         training=True,
                         num_classes=num_classes,
                         skip=skip)

    #train_loss, _, _ = normal_loss(logits, labels, num_classes, dataset.ignore_label)
    train_loss = softmax_cross_entropy_loss_mining(
        logits,
        labels,
        num_classes,
        dataset.ignore_label,
        loss_weight=1.0,
        upsample_logits=False,
        hard_example_mining_step=FLAGS.hard_example_mining_step,
        top_k_percent_pixels=FLAGS.top_k_percent_pixels,
        scope='CI_Loss')

    log_summaries(in_imgs, num_classes, logits, labels, train_loss)
    step = tf.train.get_or_create_global_step()
    main_optimizer = tf.train.AdamOptimizer(
        learning_rate=1e-4)  #1e-4/100ksteps/bs 2  ==> same but lr = 1e-5
    main_step = main_optimizer.minimize(train_loss, global_step=step)
    train_op = tf.group(main_step)

    hooks = [
        tf.train.StopAtStepHook(last_step=FLAGS.last_step),
        tf.train.NanTensorHook(train_loss),
    ]

    step_c = 0
    with tf.train.MonitoredTrainingSession(hooks=hooks,
                                           checkpoint_dir=FLAGS.checkpoint_dir,
                                           save_checkpoint_secs=300,
                                           save_summaries_secs=60) as sess:
        while not sess.should_stop():
            sess.run(train_op)
Exemple #4
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)

  dataset = data_generator.Dataset(
      dataset_name=FLAGS.dataset,
      split_name=FLAGS.eval_split,
      dataset_dir=FLAGS.dataset_dir,
      batch_size=FLAGS.eval_batch_size,
      crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
      min_resize_value=FLAGS.min_resize_value,
      max_resize_value=FLAGS.max_resize_value,
      resize_factor=FLAGS.resize_factor,
      model_variant=FLAGS.model_variant,
      num_readers=2,
      is_training=False,
      should_shuffle=False,
      should_repeat=False)

  tf.gfile.MakeDirs(FLAGS.eval_logdir)
  tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

  with tf.Graph().as_default():
    samples = dataset.get_one_shot_iterator().get_next()

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes},
        crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly.
    samples[common.IMAGE].set_shape(
        [FLAGS.eval_batch_size,
         int(FLAGS.eval_crop_size[0]),
         int(FLAGS.eval_crop_size[1]),
         3])
    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
      predictions = model.predict_labels(samples[common.IMAGE], model_options,
                                         image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      if FLAGS.quantize_delay_step >= 0:
        raise ValueError(
            'Quantize mode is not supported with multi-scale test.')

      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    predictions = predictions[common.OUTPUT_TYPE]
    predictions = tf.reshape(predictions, shape=[-1])
    labels = tf.reshape(samples[common.LABEL], shape=[-1])
    weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

    # Set ignore_label regions to label 0, because metrics.mean_iou requires
    # range of labels = [0, dataset.num_classes). Note the ignore_label regions
    # are not evaluated since the corresponding regions contain weights = 0.
    labels = tf.where(
        tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)

    predictions_tag = 'miou'
    for eval_scale in FLAGS.eval_scales:
      predictions_tag += '_' + str(eval_scale)
    if FLAGS.add_flipped_images:
      predictions_tag += '_flipped'

    # Define the evaluation metric.
    miou, update_op = tf.metrics.mean_iou(
        predictions, labels, dataset.num_of_classes, weights=weights)
    tf.summary.scalar(predictions_tag, miou)

    summary_op = tf.summary.merge_all()
    summary_hook = tf.contrib.training.SummaryAtEndHook(
        log_dir=FLAGS.eval_logdir, summary_op=summary_op)
    hooks = [summary_hook]

    num_eval_iters = None
    if FLAGS.max_number_of_evaluations > 0:
      num_eval_iters = FLAGS.max_number_of_evaluations

    if FLAGS.quantize_delay_step >= 0:
      tf.contrib.quantize.create_eval_graph()

    tf.contrib.tfprof.model_analyzer.print_model_analysis(
        tf.get_default_graph(),
        tfprof_options=tf.contrib.tfprof.model_analyzer.
        TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
    tf.contrib.tfprof.model_analyzer.print_model_analysis(
        tf.get_default_graph(),
        tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
    tf.contrib.training.evaluate_repeatedly(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.checkpoint_dir,
        eval_ops=[update_op],
        max_number_of_evaluations=num_eval_iters,
        hooks=hooks,
        eval_interval_secs=FLAGS.eval_interval_secs)
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    dataset = data_generator.Dataset(
        dataset_name=FLAGS.dataset,
        split_name=FLAGS.eval_split,
        dataset_dir=FLAGS.dataset_dir,
        batch_size=FLAGS.eval_batch_size,
        crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        model_variant=FLAGS.model_variant,
        num_readers=2,
        is_training=False,
        should_shuffle=False,
        should_repeat=False)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    with tf.Graph().as_default():
        samples = dataset.get_one_shot_iterator().get_next()

        model_options = common.ModelOptions(
            outputs_to_num_classes={
                common.OUTPUT_TYPE: dataset.num_of_classes
            },
            crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly.
        samples[common.IMAGE].set_shape([
            FLAGS.eval_batch_size,
            int(FLAGS.eval_crop_size[0]),
            int(FLAGS.eval_crop_size[1]), 3
        ])
        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            if FLAGS.quantize_delay_step >= 0:
                raise ValueError(
                    'Quantize mode is not supported with multi-scale test.')

            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])

        labels = tf.reshape(samples[common.LABEL], shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights = 0.
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        for eval_scale in FLAGS.eval_scales:
            predictions_tag += '_' + str(eval_scale)
        if FLAGS.add_flipped_images:
            predictions_tag += '_flipped'

        # Calculate iou for each class
        metric_map = {}
        iou_v, update_op = iou_each_class.iou(predictions,
                                              labels,
                                              dataset.num_of_classes,
                                              weights=weights)
        for index in range(0, dataset.num_of_classes):
            metric_map['class_' + str(index) + '_iou'] = (iou_v[index],
                                                          update_op[index])

        metrics_to_values, metrics_to_updates = (
            tf.contrib.metrics.aggregate_metric_map(metric_map))

        for metric_name, metric_value in six.iteritems(metrics_to_values):
            slim.summaries.add_scalar_summary(metric_value,
                                              metric_name,
                                              print_summary=True)

        summary_op = tf.summary.merge_all()
        summary_hook = tf.contrib.training.SummaryAtEndHook(
            log_dir=FLAGS.eval_logdir, summary_op=summary_op)
        hooks = [summary_hook]

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations

        if FLAGS.quantize_delay_step >= 0:
            tf.contrib.quantize.create_eval_graph()

        tf.contrib.tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=tf.contrib.tfprof.model_analyzer.
            TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
        tf.contrib.tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)

        ######################################################################
        ############### Modified to evaluate all chekpoints in a folder ######
        evaluation.evaluate_repeatedly(
            master=FLAGS.master,
            checkpoint_dir=FLAGS.checkpoint_dir,
            eval_ops=[update_op],
            max_number_of_evaluations=num_eval_iters,
            hooks=hooks,
            eval_interval_secs=FLAGS.eval_interval_secs)

        #####################################################################
        ###### read iou from tensorboard to csv file ########################
        checkpoint_path_list = tf.train.get_checkpoint_state(
            FLAGS.checkpoint_dir).all_model_checkpoint_paths
        iterations = [
            os.path.basename(a).split('-')[1] for a in checkpoint_path_list
        ]
        iou_from_Tensorboard.to_csv(FLAGS.eval_logdir, dataset.num_of_classes,
                                    iterations)
def main(unused_argv):

  print("logging params")
  print("Learning rate: ", FLAGS.base_learning_rate)
  print("Momentum: ", FLAGS.momentum)
  print("Weight decay: ", FLAGS.weight_decay)
  print("training steps: ", FLAGS.training_number_of_steps)
  print("Dataset name: ",FLAGS.dataset)
  print("Using dataset for training: ",FLAGS.train_split)
  print("Dataset directory: ",FLAGS.dataset_dir)
  print("batch size: ", FLAGS.train_batch_size)
  print("crop size: ", FLAGS.train_crop_size)
  print("Model variant used: ",FLAGS.model_variant)
  print("Train log directory: ", FLAGS.train_logdir)
  train_list = []
  val_list = []
  count= 0
  best_val_mean_iou = 0.718
  dir_path='deeplab/best_ckpt/'

  tf.logging.set_verbosity(tf.logging.INFO)

  tf.gfile.MakeDirs(FLAGS.train_logdir)
  tf.logging.info('Training on %s set', FLAGS.train_split)

  graph = tf.Graph()
  with graph.as_default():
    with tf.device(tf.train.replica_device_setter(ps_tasks=FLAGS.num_ps_tasks)):
      assert FLAGS.train_batch_size % FLAGS.num_clones == 0, (
          'Training batch size not divisble by number of clones (GPUs).')
      clone_batch_size = FLAGS.train_batch_size // FLAGS.num_clones # will be equivalent to train_batch_size

      dataset = data_generator.Dataset(
          dataset_name=FLAGS.dataset,
          split_name=FLAGS.train_split,
          dataset_dir=FLAGS.dataset_dir,
          batch_size=clone_batch_size,
          crop_size=[int(sz) for sz in FLAGS.train_crop_size],
          min_resize_value=FLAGS.min_resize_value,
          max_resize_value=FLAGS.max_resize_value,
          resize_factor=FLAGS.resize_factor,
          min_scale_factor=FLAGS.min_scale_factor,
          max_scale_factor=FLAGS.max_scale_factor,
          scale_factor_step_size=FLAGS.scale_factor_step_size,
          model_variant=FLAGS.model_variant,
          num_readers=1, #check??
          is_training=True,
          should_shuffle=True,
          should_repeat=True)

      train_tensor, summary_op = _train_deeplab_model(
          dataset.get_one_shot_iterator(), dataset.num_of_classes,
          dataset.ignore_label)

      # Soft placement allows placing on CPU ops without GPU implementation.
      session_config = tf.ConfigProto(
          allow_soft_placement=True, log_device_placement=False)

      last_layers = model.get_extra_layer_scopes(
          FLAGS.last_layers_contain_logits_only)
      init_fn = None
      if FLAGS.tf_initial_checkpoint:
        init_fn = train_utils.get_model_init_fn(
            FLAGS.train_logdir,
            FLAGS.tf_initial_checkpoint,
            FLAGS.initialize_last_layer,
            last_layers,
            ignore_missing_vars=True)

      scaffold = tf.train.Scaffold(
          init_fn=init_fn,
          summary_op=summary_op,
      )

      stop_hook = tf.train.StopAtStepHook(
          last_step=FLAGS.training_number_of_steps)

      profile_dir = FLAGS.profile_logdir
      if profile_dir is not None:
        tf.gfile.MakeDirs(profile_dir)

      with tf.contrib.tfprof.ProfileContext(
          enabled=profile_dir is not None, profile_dir=profile_dir):
        with tf.train.MonitoredTrainingSession(
            master=FLAGS.master,
            is_chief=(FLAGS.task == 0),
            config=session_config,
            scaffold=scaffold,
            checkpoint_dir=FLAGS.train_logdir,
            summary_dir=FLAGS.train_logdir,
            log_step_count_steps=FLAGS.log_steps,
            save_summaries_steps=FLAGS.save_summaries_secs,
            save_checkpoint_secs=FLAGS.save_interval_secs,
            hooks=[stop_hook]) as sess:
          while not sess.should_stop():
            count+=1
            training_loss = sess.run([train_tensor])
            if np.isnan(training_loss):
                print("learning rate too high. exiting!")
                exit()

            try:
              if count>5000 and count%200==0:
                train_iou = subprocess.check_output([sys.executable, "deeplab/vistrain.py"])
                val_iou = subprocess.check_output([sys.executable, "deeplab/vis.py"])
                val_mean_iou = float(val_iou.decode("utf-8").split('\n')[-2])
                val_list.append(val_mean_iou*100)
                train_mean_iou=float(train_iou.decode("utf-8").split('\n')[-2])*100
                train_list.append(train_mean_iou)

                
                print("Mean IoU on training dataset: ", train_mean_iou)
                print("Mean IoU on validation dataset: ", val_mean_iou)
                sys.stdout.flush()

                if  val_mean_iou > best_val_mean_iou:
                  if os.path.isdir(dir_path): shutil.rmtree(dir_path)
		            
                  print("Validation Mean IoU: ", val_mean_iou)
                  shutil.copytree(FLAGS.train_logdir, dir_path)
                  best_val_mean_iou = val_mean_iou
            except:
              print("Validation script returned non-zero status.")
Exemple #7
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    # Get dataset-dependent information.
    dataset = data_generator.Dataset(  # 获取相应数据集
        dataset_name=FLAGS.dataset,  # 数据集名称
        split_name=FLAGS.vis_split,  # 用于语义分割的数据集的tfrecorder文件 默认带有val
        dataset_dir=FLAGS.dataset_dir,  # 数据集目录
        batch_size=FLAGS.vis_batch_size,  # 一次性处理的image_batch_size 默认为1
        crop_size=[int(sz)
                   for sz in FLAGS.vis_crop_size],  #crop_size 默认为513,513
        min_resize_value=FLAGS.min_resize_value,  # None
        max_resize_value=FLAGS.max_resize_value,  # None
        resize_factor=FLAGS.resize_factor,  # None
        model_variant=FLAGS.
        model_variant,  # 模型的变体 默认为mobilenet_v2  本次训练为 xception_65
        is_training=False,  # 不训练
        should_shuffle=False,  # 不将输入的数据随机打乱
        should_repeat=False)  # 不一直重复

    train_id_to_eval_id = None
    if dataset.dataset_name == data_generator.get_cityscapes_dataset_name():
        tf.logging.info('Cityscapes requires converting train_id to eval_id.')
        train_id_to_eval_id = _CITYSCAPES_TRAIN_ID_TO_EVAL_ID

    # Prepare for visualization.
    tf.gfile.MakeDirs(FLAGS.vis_logdir)  # 可视化图片放置的文件夹
    save_dir = os.path.join(FLAGS.vis_logdir,
                            _SEMANTIC_PREDICTION_SAVE_FOLDER)  # 创建存放文件夹
    tf.gfile.MakeDirs(save_dir)  # 创建segmentation_results文件夹
    raw_save_dir = os.path.join(FLAGS.vis_logdir,
                                _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
    tf.gfile.MakeDirs(raw_save_dir)  # 创建 raw_segmentation_results文件夹

    tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

    with tf.Graph().as_default():
        samples = dataset.get_one_shot_iterator().get_next()  # 获取数据

        model_options = common.ModelOptions(
            outputs_to_num_classes={
                common.OUTPUT_TYPE: dataset.num_of_classes
            },
            crop_size=[int(sz) for sz in FLAGS.vis_crop_size],  # 1024,2048
            atrous_rates=FLAGS.atrous_rates,  # 6,12,18
            output_stride=FLAGS.output_stride)  # 4

        if tuple(FLAGS.eval_scales) == (1.0, ):  # 不缩放进行评估
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(  # 标签预测 跟eval一样
                samples[common.IMAGE],
                model_options=model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:  # 多尺度评估
            tf.logging.info('Performing multi-scale test.')
            if FLAGS.quantize_delay_step >= 0:
                raise ValueError(
                    'Quantize mode is not supported with multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        '''
            predictions:
                {'semantic': <tf.Tensor 'ArgMax:0' shape=(1, 1024, 2048) dtype=int64>, 
                 'semantic_prob': <tf.Tensor 'Softmax:0' shape=(1, 1024, 2048, 19) dtype=float32>}

    '''
        predictions = predictions[common.OUTPUT_TYPE]

        if FLAGS.min_resize_value and FLAGS.max_resize_value:  # None 暂不考虑
            # Only support batch_size = 1, since we assume the dimensions of original
            # image after tf.squeeze is [height, width, 3].
            assert FLAGS.vis_batch_size == 1

            # Reverse the resizing and padding operations performed in preprocessing.
            # First, we slice the valid regions (i.e., remove padded region) and then
            # we resize the predictions back.
            original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE])
            original_image_shape = tf.shape(original_image)
            predictions = tf.slice(
                predictions, [0, 0, 0],
                [1, original_image_shape[0], original_image_shape[1]])
            resized_shape = tf.to_int32([
                tf.squeeze(samples[common.HEIGHT]),
                tf.squeeze(samples[common.WIDTH])
            ])
            predictions = tf.squeeze(
                tf.image.resize_images(
                    tf.expand_dims(predictions, 3),
                    resized_shape,
                    method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                    align_corners=True), 3)

        # 计数作用,每进行一个batch, global加1
        tf.train.get_or_create_global_step()
        if FLAGS.quantize_delay_step >= 0:  # 默认为-1
            contrib_quantize.create_eval_graph()

        num_iteration = 0
        max_num_iteration = FLAGS.max_number_of_iterations  # 0

        checkpoints_iterator = contrib_training.checkpoints_iterator(
            FLAGS.checkpoint_dir, min_interval_secs=FLAGS.eval_interval_secs)
        for checkpoint_path in checkpoints_iterator:
            num_iteration += 1
            tf.logging.info('Starting visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
            tf.logging.info('Visualizing with model %s', checkpoint_path)

            scaffold = tf.train.Scaffold(
                init_op=tf.global_variables_initializer())
            session_creator = tf.train.ChiefSessionCreator(
                scaffold=scaffold,
                master=FLAGS.master,
                checkpoint_filename_with_path=checkpoint_path)
            with tf.train.MonitoredSession(session_creator=session_creator,
                                           hooks=None) as sess:
                batch = 0
                image_id_offset = 0

                while not sess.should_stop():
                    tf.logging.info('Visualizing batch %d', batch + 1)
                    _process_batch(
                        sess=sess,
                        original_images=samples[
                            common.
                            ORIGINAL_IMAGE],  #  <tf.Tensor 'IteratorGetNext:4' shape=(?, ?, ?, 3) dtype=uint8>
                        semantic_predictions=
                        predictions,  # <tf.Tensor 'ArgMax:0' shape=(1, 1024, 2048) dtype=int64>
                        image_names=samples[
                            common.
                            IMAGE_NAME],  # <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=string>
                        image_heights=samples[
                            common.
                            HEIGHT],  #  <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=int64>
                        image_widths=samples[
                            common.
                            WIDTH],  # <tf.Tensor 'IteratorGetNext:5' shape=(?,) dtype=int64>
                        image_id_offset=image_id_offset,  # 0
                        save_dir=save_dir,  # 语义分割结果放置的路径
                        raw_save_dir=raw_save_dir,
                        train_id_to_eval_id=train_id_to_eval_id
                    )  # 只有cityscape中不为None
                    image_id_offset += FLAGS.vis_batch_size  # 可视化的imageId
                    batch += 1

            tf.logging.info('Finished visualization at ' +
                            time.strftime('%Y-%m-%d-%H:%M:%S', time.gmtime()))
            if max_num_iteration > 0 and num_iteration >= max_num_iteration:
                break
Exemple #8
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
    config = model_deploy.DeploymentConfig(num_clones=FLAGS.num_clones,
                                           clone_on_cpu=FLAGS.clone_on_cpu,
                                           replica_id=FLAGS.task,
                                           num_replicas=FLAGS.num_replicas,
                                           num_ps_tasks=FLAGS.num_ps_tasks)

    # Split the batch across GPUs.
    assert FLAGS.train_batch_size % config.num_clones == 0, (
        'Training batch size not divisble by number of clones (GPUs).')

    clone_batch_size = FLAGS.train_batch_size // config.num_clones

    tf.gfile.MakeDirs(FLAGS.train_logdir)
    common.outputlogMessage('Training on %s set' % FLAGS.train_split)
    common.outputlogMessage('Dataset: %s' % FLAGS.dataset)
    common.outputlogMessage('train_crop_size: %s' % str(FLAGS.train_crop_size))
    common.outputlogMessage(str(FLAGS.train_crop_size))
    common.outputlogMessage('atrous_rates: %s' % str(FLAGS.atrous_rates))
    common.outputlogMessage('number of classes: %s' % str(FLAGS.num_classes))
    common.outputlogMessage('Ignore label value: %s' % str(FLAGS.ignore_label))
    pid = os.getpid()
    with open('train_py_pid.txt', 'w') as f_obj:
        f_obj.writelines('%d' % pid)

    with tf.Graph().as_default() as graph:
        with tf.device(config.inputs_device()):
            dataset = data_generator.Dataset(
                dataset_name=FLAGS.dataset,
                split_name=FLAGS.train_split,
                dataset_dir=FLAGS.dataset_dir,
                batch_size=clone_batch_size,
                crop_size=[int(sz) for sz in FLAGS.train_crop_size],
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                min_scale_factor=FLAGS.min_scale_factor,
                max_scale_factor=FLAGS.max_scale_factor,
                scale_factor_step_size=FLAGS.scale_factor_step_size,
                model_variant=FLAGS.model_variant,
                num_readers=4,
                is_training=True,
                should_shuffle=True,
                should_repeat=True,
                num_classes=FLAGS.num_classes,
                ignore_label=FLAGS.ignore_label)

        # Create the global step on the device storing the variables.
        with tf.device(config.variables_device()):
            global_step = tf.train.get_or_create_global_step()

            # Define the model and create clones.
            model_fn = _build_deeplab
            model_args = (dataset.get_one_shot_iterator(), {
                common.OUTPUT_TYPE: dataset.num_of_classes
            }, dataset.ignore_label)
            clones = model_deploy.create_clones(config,
                                                model_fn,
                                                args=model_args)

            # Gather update_ops from the first clone. These contain, for example,
            # the updates for the batch_norm variables created by model_fn.
            first_clone_scope = config.clone_scope(0)
            update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS,
                                           first_clone_scope)

        # Gather initial summaries.
        summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

        # Add summaries for model variables.
        for model_var in tf.model_variables():
            summaries.add(tf.summary.histogram(model_var.op.name, model_var))

        # Add summaries for images, labels, semantic predictions
        if FLAGS.save_summaries_images:
            summary_image = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
            summaries.add(
                tf.summary.image('samples/%s' % common.IMAGE, summary_image))

            first_clone_label = graph.get_tensor_by_name(
                ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
            # Scale up summary image pixel values for better visualization.
            pixel_scaling = max(1, 255 // dataset.num_of_classes)
            summary_label = tf.cast(first_clone_label * pixel_scaling,
                                    tf.uint8)
            summaries.add(
                tf.summary.image('samples/%s' % common.LABEL, summary_label))

            first_clone_output = graph.get_tensor_by_name(
                ('%s/%s:0' %
                 (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
            predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1)

            summary_predictions = tf.cast(predictions * pixel_scaling,
                                          tf.uint8)
            summaries.add(
                tf.summary.image('samples/%s' % common.OUTPUT_TYPE,
                                 summary_predictions))

        # Add summaries for losses.
        for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
            summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

        # Build the optimizer based on the device specification.
        with tf.device(config.optimizer_device()):
            learning_rate = train_utils.get_model_learning_rate(
                FLAGS.learning_policy,
                FLAGS.base_learning_rate,
                FLAGS.learning_rate_decay_step,
                FLAGS.learning_rate_decay_factor,
                FLAGS.training_number_of_steps,
                FLAGS.learning_power,
                FLAGS.slow_start_step,
                FLAGS.slow_start_learning_rate,
                decay_steps=FLAGS.decay_steps,
                end_learning_rate=FLAGS.end_learning_rate)

            summaries.add(tf.summary.scalar('learning_rate', learning_rate))

            if FLAGS.optimizer == 'momentum':
                optimizer = tf.train.MomentumOptimizer(learning_rate,
                                                       FLAGS.momentum)
            elif FLAGS.optimizer == 'adam':
                optimizer = tf.train.AdamOptimizer(
                    learning_rate=FLAGS.adam_learning_rate,
                    epsilon=FLAGS.adam_epsilon)
            else:
                raise ValueError('Unknown optimizer')

        if FLAGS.quantize_delay_step >= 0:
            if FLAGS.num_clones > 1:
                raise ValueError(
                    'Quantization doesn\'t support multi-clone yet.')
            contrib_quantize.create_training_graph(
                quant_delay=FLAGS.quantize_delay_step)

        startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps

        with tf.device(config.variables_device()):
            total_loss, grads_and_vars = model_deploy.optimize_clones(
                clones, optimizer)
            total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
            summaries.add(tf.summary.scalar('total_loss', total_loss))

            # Modify the gradients for biases and last layer variables.
            last_layers = model.get_extra_layer_scopes(
                FLAGS.last_layers_contain_logits_only)
            grad_mult = train_utils.get_model_gradient_multipliers(
                last_layers, FLAGS.last_layer_gradient_multiplier)
            if grad_mult:
                grads_and_vars = slim.learning.multiply_gradients(
                    grads_and_vars, grad_mult)

            # Create gradient update op.
            grad_updates = optimizer.apply_gradients(grads_and_vars,
                                                     global_step=global_step)
            update_ops.append(grad_updates)
            update_op = tf.group(*update_ops)
            with tf.control_dependencies([update_op]):
                train_tensor = tf.identity(total_loss, name='train_op')

        # Add the summaries from the first clone. These contain the summaries
        # created by model_fn and either optimize_clones() or _gather_clone_loss().
        summaries |= set(
            tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

        # Merge all summaries together.
        summary_op = tf.summary.merge(list(summaries))

        # Soft placement allows placing on CPU ops without GPU implementation.
        session_config = tf.ConfigProto(allow_soft_placement=True,
                                        log_device_placement=False)

        # Start the training.
        profile_dir = FLAGS.profile_logdir
        if profile_dir is not None:
            tf.gfile.MakeDirs(profile_dir)

        with contrib_tfprof.ProfileContext(enabled=profile_dir is not None,
                                           profile_dir=profile_dir):
            init_fn = None
            if FLAGS.tf_initial_checkpoint:
                init_fn = train_utils.get_model_init_fn(
                    FLAGS.train_logdir,
                    FLAGS.tf_initial_checkpoint,
                    FLAGS.initialize_last_layer,
                    last_layers,
                    ignore_missing_vars=True)

            slim.learning.train(train_tensor,
                                logdir=FLAGS.train_logdir,
                                log_every_n_steps=FLAGS.log_steps,
                                master=FLAGS.master,
                                number_of_steps=FLAGS.training_number_of_steps,
                                is_chief=(FLAGS.task == 0),
                                session_config=session_config,
                                startup_delay_steps=startup_delay_steps,
                                init_fn=init_fn,
                                summary_op=summary_op,
                                save_summaries_secs=FLAGS.save_summaries_secs,
                                save_interval_secs=FLAGS.save_interval_secs)
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)
  # Set up deployment (i.e., multi-GPUs and/or multi-replicas).
  # 设置多gpu训练的相关参数
  config = model_deploy.DeploymentConfig(
      num_clones=FLAGS.num_clones,  # gpu数量
      clone_on_cpu=FLAGS.clone_on_cpu,  # 默认为False
      replica_id=FLAGS.task,    # taskId
      num_replicas=FLAGS.num_replicas,  # 默认为1
      num_ps_tasks=FLAGS.num_ps_tasks)  # 默认为0

  # Split the batch across GPUs.
  assert FLAGS.train_batch_size % config.num_clones == 0, (
      'Training batch size not divisble by number of clones (GPUs).')

  clone_batch_size = FLAGS.train_batch_size // config.num_clones    # 各个gpu均分batch_size

  tf.gfile.MakeDirs(FLAGS.train_logdir)     # 创建存放训练日志的文件
  tf.logging.info('Training on %s set', FLAGS.train_split)

  with tf.Graph().as_default() as graph:
    with tf.device(config.inputs_device()):
      dataset = data_generator.Dataset(     # 定义数据集参数
          dataset_name=FLAGS.dataset,   # 数据集名称 cityscapes
          split_name=FLAGS.train_split,  # 指定带有train的tfrecorder数据集 默认为“train”
          dataset_dir=FLAGS.dataset_dir,   # 数据集目录 tfrecoder文件的数据集目录
          batch_size=clone_batch_size,  # 均分后各个gpu训练中指定batch_size 的大小
          crop_size=[int(sz) for sz in FLAGS.train_crop_size],  # 训练中裁剪的图像大小 513,513
          min_resize_value=FLAGS.min_resize_value,  # 默认为 None
          max_resize_value=FLAGS.max_resize_value,  # 默认为None
          resize_factor=FLAGS.resize_factor,    # 默认为None
          min_scale_factor=FLAGS.min_scale_factor,   # 训练中,图像变换尺度,用于数据增强 默认最小为0.5
          max_scale_factor=FLAGS.max_scale_factor,   # 训练中,图像变换尺度,用于数据增强 默认最大为2
          scale_factor_step_size=FLAGS.scale_factor_step_size,      # 训练中,图像变换尺度增加的步长,默认为0.25  从0.5到2
          model_variant=FLAGS.model_variant,    # 指定模型 xception_65
          num_readers=4,    # 读取数据个数 若多gpu可增大加快训练速度
          is_training=True,
          should_shuffle=True,
          should_repeat=True)

    # Create the global step on the device storing the variables.
    with tf.device(config.variables_device()):
      # 计数作用,每训练一个batch, global加1
      global_step = tf.train.get_or_create_global_step()

      # Define the model and create clones.
      model_fn = _build_deeplab  # 定义deeplab模型
      model_args = (dataset.get_one_shot_iterator(), {
          common.OUTPUT_TYPE: dataset.num_of_classes
      }, dataset.ignore_label) #模型参数
      clones = model_deploy.create_clones(config, model_fn, args=model_args)

      # Gather update_ops from the first clone. These contain, for example,
      # the updates for the batch_norm variables created by model_fn.
      first_clone_scope = config.clone_scope(0)
      update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS, first_clone_scope)

    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    # Add summaries for model variables.
    for model_var in tf.model_variables():
      summaries.add(tf.summary.histogram(model_var.op.name, model_var))

    # Add summaries for images, labels, semantic predictions
    if FLAGS.save_summaries_images:      # 默认为False
      summary_image = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.IMAGE)).strip('/'))
      summaries.add(
          tf.summary.image('samples/%s' % common.IMAGE, summary_image))

      first_clone_label = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.LABEL)).strip('/'))
      # Scale up summary image pixel values for better visualization.
      pixel_scaling = max(1, 255 // dataset.num_of_classes)
      summary_label = tf.cast(first_clone_label * pixel_scaling, tf.uint8)
      summaries.add(
          tf.summary.image('samples/%s' % common.LABEL, summary_label))

      first_clone_output = graph.get_tensor_by_name(
          ('%s/%s:0' % (first_clone_scope, common.OUTPUT_TYPE)).strip('/'))
      predictions = tf.expand_dims(tf.argmax(first_clone_output, 3), -1)

      summary_predictions = tf.cast(predictions * pixel_scaling, tf.uint8)
      summaries.add(
          tf.summary.image(
              'samples/%s' % common.OUTPUT_TYPE, summary_predictions))

    # Add summaries for losses.
    for loss in tf.get_collection(tf.GraphKeys.LOSSES, first_clone_scope):
      summaries.add(tf.summary.scalar('losses/%s' % loss.op.name, loss))

    # Build the optimizer based on the device specification.
    with tf.device(config.optimizer_device()):
      learning_rate = train_utils.get_model_learning_rate(  # 获取模型学习率
          FLAGS.learning_policy,    # poly学习策略
          FLAGS.base_learning_rate,     # 0.0001
          FLAGS.learning_rate_decay_step,   # 固定2000次进行一次学习率衰退
          FLAGS.learning_rate_decay_factor,     # 0.1
          FLAGS.training_number_of_steps,   # 训练次数 20000
          FLAGS.learning_power,     # poly power 0.9
          FLAGS.slow_start_step,    # 0
          FLAGS.slow_start_learning_rate,   # 1e-4 缓慢开始的学习率
          decay_steps=FLAGS.decay_steps,    # 0.0
          end_learning_rate=FLAGS.end_learning_rate)     # 0.0

      summaries.add(tf.summary.scalar('learning_rate', learning_rate))
      # 模型训练优化器
      if FLAGS.optimizer == 'momentum':
        optimizer = tf.train.MomentumOptimizer(learning_rate, FLAGS.momentum)
      elif FLAGS.optimizer == 'adam':   # adam优化器 寻找全局最优点的优化算法,引入了二次方梯度校正
        optimizer = tf.train.AdamOptimizer(
            learning_rate=FLAGS.adam_learning_rate, epsilon=FLAGS.adam_epsilon)
      else:
        raise ValueError('Unknown optimizer')

    if FLAGS.quantize_delay_step >= 0:  # 默认为-1 忽略
      if FLAGS.num_clones > 1:
        raise ValueError('Quantization doesn\'t support multi-clone yet.')
      contrib_quantize.create_training_graph(
          quant_delay=FLAGS.quantize_delay_step)

    startup_delay_steps = FLAGS.task * FLAGS.startup_delay_steps    # FLAGS.startup_delay_steps 默认为15

    with tf.device(config.variables_device()):
      total_loss, grads_and_vars = model_deploy.optimize_clones(
          clones, optimizer)    # 计算total_loss
      total_loss = tf.check_numerics(total_loss, 'Loss is inf or nan.')
      summaries.add(tf.summary.scalar('total_loss', total_loss))

      # Modify the gradients for biases and last layer variables.
      last_layers = model.get_extra_layer_scopes(
          FLAGS.last_layers_contain_logits_only)
      # 获取梯度乘子
      grad_mult = train_utils.get_model_gradient_multipliers(
          last_layers, FLAGS.last_layer_gradient_multiplier)
      # grad_mult : {'logits/semantic/biases': 2.0, 'logits/semantic/weights': 1.0}
      if grad_mult:
        grads_and_vars = slim.learning.multiply_gradients(
            grads_and_vars, grad_mult)

      # Create gradient update op.
      grad_updates = optimizer.apply_gradients(     # 将计算的梯度用于变量上,返回一个应用指定的梯度的操作 opration
          grads_and_vars, global_step=global_step)  # 对global_step进行自增
      update_ops.append(grad_updates)
      update_op = tf.group(*update_ops)
      with tf.control_dependencies([update_op]):
        train_tensor = tf.identity(total_loss, name='train_op')

    # Add the summaries from the first clone. These contain the summaries
    # created by model_fn and either optimize_clones() or _gather_clone_loss().
    summaries |= set(
        tf.get_collection(tf.GraphKeys.SUMMARIES, first_clone_scope))

    # Merge all summaries together.
    summary_op = tf.summary.merge(list(summaries))

    # Soft placement allows placing on CPU ops without GPU implementation.
    session_config = tf.ConfigProto(
        allow_soft_placement=True, log_device_placement=False)

    # Start the training.
    profile_dir = FLAGS.profile_logdir   # 默认为None
    if profile_dir is not None:
      tf.gfile.MakeDirs(profile_dir)

    with contrib_tfprof.ProfileContext(
        enabled=profile_dir is not None, profile_dir=profile_dir):
      init_fn = None
      if FLAGS.tf_initial_checkpoint:   # 获取预训练权重
        init_fn = train_utils.get_model_init_fn(
            FLAGS.train_logdir,
            FLAGS.tf_initial_checkpoint,
            FLAGS.initialize_last_layer,
            last_layers,
            ignore_missing_vars=True)

      slim.learning.train(
          train_tensor,
          logdir=FLAGS.train_logdir,
          log_every_n_steps=FLAGS.log_steps,
          master=FLAGS.master,
          number_of_steps=FLAGS.training_number_of_steps,
          is_chief=(FLAGS.task == 0),
          session_config=session_config,
          startup_delay_steps=startup_delay_steps,
          init_fn=init_fn,
          summary_op=summary_op,
          save_summaries_secs=FLAGS.save_summaries_secs,
          save_interval_secs=FLAGS.save_interval_secs)
Exemple #10
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)

  dataset = data_generator.Dataset(
      dataset_name=FLAGS.dataset,
      split_name=FLAGS.eval_split,
      dataset_dir=FLAGS.dataset_dir,
      batch_size=FLAGS.eval_batch_size,
      crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
      min_resize_value=FLAGS.min_resize_value,
      max_resize_value=FLAGS.max_resize_value,
      resize_factor=FLAGS.resize_factor,
      model_variant=FLAGS.model_variant,
      num_readers=2,
      is_training=False,
      should_shuffle=False,
      should_repeat=False,
      non_uniform_sampling=FLAGS.nus_preprocess,
      output_target_sampling=FLAGS.eval_type == "nus")

  tf.gfile.MakeDirs(FLAGS.eval_logdir)
  tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

  with tf.Graph().as_default():
    samples = dataset.get_one_shot_iterator().get_next()

    if FLAGS.eval_type == "nus":
        sampling_location = _nus_locations(samples[common.IMAGE], False)
        target_locations = samples[TARGET_SAMPLING]
        mse, update_op = tf.metrics.mean_squared_error(sampling_location, target_locations)
        # update_op = tf.Print(update_op, [mse])

        # tf.summary.image("InputImages", samples[common.IMAGE])
        # tf.summary.image("InputLabel", tf.to_float(samples[common.LABEL]) / 19)
        # tf.summary.image("ResViz", viz(sampling_location))
        tf.summary.scalar('mse', mse)
    else:
        crop_size = [int(sz) for sz in FLAGS.eval_crop_size]
        if FLAGS.nus_preprocess:
            crop_size = [FLAGS.nus_sampling_size] * 2

        model_options = common.ModelOptions(
            outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes},
            crop_size=crop_size,
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        if tuple(FLAGS.eval_scales) == (1.0,):
          tf.logging.info('Performing single-scale test.')
          predictions = model.predict_labels(samples[common.IMAGE], model_options,
                                             image_pyramid=FLAGS.image_pyramid,
                                             output_logits=FLAGS.nus_preprocess)
        else:
          tf.logging.info('Performing multi-scale test.')
          if FLAGS.quantize_delay_step >= 0:
            raise ValueError(
                'Quantize mode is not supported with multi-scale test.')

          predictions = model.predict_labels_multi_scale(
              samples[common.IMAGE],
              model_options=model_options,
              eval_scales=FLAGS.eval_scales,
              add_flipped_images=FLAGS.add_flipped_images)

        if FLAGS.nus_preprocess:
            with tf.name_scope("nus_interpolation"):
                assert FLAGS.eval_batch_size == 1, "Only support eval_batch_size == 1"
                sampling = samples[SAMPLING]
                logits = predictions[common.OUTPUT_TYPE + "/logits"]
                shape = tf.shape(samples[common.LABEL])[1:3]
                predictions = tf.py_func(
                    nus.nus_interpolate,
                    [logits[0], sampling[0], shape],
                    logits.dtype,
                )[None, ...]
                predictions = tf.argmax(predictions, axis=3)
        else:
            predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])
        labels = tf.reshape(samples[common.LABEL], shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights = 0.
        labels = tf.where(
            tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        for eval_scale in FLAGS.eval_scales:
          predictions_tag += '_' + str(eval_scale)
        if FLAGS.add_flipped_images:
          predictions_tag += '_flipped'

        # Define the evaluation metric.
        miou, update_op = tf.metrics.mean_iou(
            predictions, labels, dataset.num_of_classes, weights=weights)
        miou = tf.Print(miou, ["mIoU", miou])
        tf.summary.scalar(predictions_tag, miou)

    summary_op = tf.summary.merge_all()
    summary_hook = tf.contrib.training.SummaryAtEndHook(
        log_dir=FLAGS.eval_logdir, summary_op=summary_op)
    hooks = [summary_hook]

    num_eval_iters = None
    if FLAGS.max_number_of_evaluations > 0:
      num_eval_iters = FLAGS.max_number_of_evaluations

    if FLAGS.quantize_delay_step >= 0:
      tf.contrib.quantize.create_eval_graph()

    tf.contrib.tfprof.model_analyzer.print_model_analysis(
        tf.get_default_graph(),
        tfprof_options=tf.contrib.tfprof.model_analyzer.
        TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
    tf.contrib.tfprof.model_analyzer.print_model_analysis(
        tf.get_default_graph(),
        tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
    tf.contrib.training.evaluate_repeatedly(
        master=FLAGS.master,
        checkpoint_dir=FLAGS.checkpoint_dir,
        eval_ops=[update_op],
        max_number_of_evaluations=num_eval_iters,
        hooks=hooks,
        eval_interval_secs=FLAGS.eval_interval_secs)
Exemple #11
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    dataset = data_generator.Dataset(  # 获取验证集图片数据
        dataset_name=FLAGS.dataset,  # 数据集名称 cityscapes  默认为 pascal_voc_seg
        split_name=FLAGS.eval_split,  # 指定带有val的tfrecorder数据集 默认为“val”
        dataset_dir=FLAGS.dataset_dir,  # 数据集目录 tfrecoder文件的数据集目录
        batch_size=FLAGS.eval_batch_size,  # 每个batch包含的image数量 默认为1
        crop_size=[int(sz)
                   for sz in FLAGS.eval_crop_size],  # 评估时crop_size 默认为513,513
        min_resize_value=FLAGS.min_resize_value,  # 默认为None
        max_resize_value=FLAGS.max_resize_value,  # 默认为None
        resize_factor=FLAGS.resize_factor,  # 默认为None
        model_variant=FLAGS.model_variant,  # 模型的变体  本次训练为 xception_65
        num_readers=2,  # 并行读取图片的数量
        is_training=False,  # 不训练
        should_shuffle=False,  # 不将输入的数据随机打乱
        should_repeat=False)  # 不一直重复

    tf.gfile.MakeDirs(FLAGS.eval_logdir)  # 创建评估目录
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    with tf.Graph().as_default():
        samples = dataset.get_one_shot_iterator().get_next()  # 获取一次迭代的验证集数据
        '''
        samples:
            {'image_name': <tf.Tensor 'IteratorGetNext:2' shape=(?,) dtype=string>, 
             'width': <tf.Tensor 'IteratorGetNext:5' shape=(?,) dtype=int64>, 
             'image': <tf.Tensor 'IteratorGetNext:1' shape=(?, 1024, 2048, 3) dtype=float32>, 
             'height': <tf.Tensor 'IteratorGetNext:0' shape=(?,) dtype=int64>, 
             'label': <tf.Tensor 'IteratorGetNext:3' shape=(?, 1024, 2048, 1) dtype=int32>, 
             'original_image': <tf.Tensor 'IteratorGetNext:4' shape=(?, ?, ?, 3) dtype=uint8>}
        '''
        model_options = common.ModelOptions(  # 模型参数
            outputs_to_num_classes={
                common.OUTPUT_TYPE: dataset.num_of_classes
            },  # {semantic: 19}
            crop_size=[int(sz) for sz in FLAGS.eval_crop_size],  # 1024,2048
            atrous_rates=FLAGS.atrous_rates,  # 6,12,18
            output_stride=FLAGS.output_stride)  # 16

        # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly.
        samples[common.IMAGE].set_shape(  # 设置形状
            [
                FLAGS.eval_batch_size,  # 默认为1
                int(FLAGS.eval_crop_size[0]),
                int(FLAGS.eval_crop_size[1]),
                3
            ])
        if tuple(FLAGS.eval_scales) == (1.0, ):  # 默认 评估尺度为1
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,  # 进行每个像素点预测
                image_pyramid=FLAGS.image_pyramid)
            '''
              predictions:
                  {'semantic': <tf.Tensor 'ArgMax:0' shape=(1, 1024, 2048) dtype=int64>, 
                   'semantic_prob': <tf.Tensor 'Softmax:0' shape=(1, 1024, 2048, 19) dtype=float32>}
            '''
        else:
            tf.logging.info('Performing multi-scale test.')
            if FLAGS.quantize_delay_step >= 0:
                raise ValueError(
                    'Quantize mode is not supported with multi-scale test.')

            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])  # 预测标签
        labels = tf.reshape(samples[common.LABEL], shape=[-1])  # 真实标签
        weights = tf.to_float(tf.not_equal(labels,
                                           dataset.ignore_label))  # 各标签权重

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights = 0.
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'  # MIoU
        predictions_tag1 = 'accuracy_pixel'  # 像素精度
        for eval_scale in FLAGS.eval_scales:  # 默认为单尺度[1.0]
            predictions_tag += '_' + str(eval_scale)
            predictions_tag1 += '_' + str(eval_scale)
        if FLAGS.add_flipped_images:  # 默认为False 不设置左右翻转来评估模型
            predictions_tag += '_flipped'
            predictions_tag1 += '_flipped'

        # Define the evaluation metric.
        metric_map = {}
        num_classes = dataset.num_of_classes  # 19
        metric_map['eval/%s_overall' % predictions_tag] = tf.metrics.mean_iou(
            labels=labels,
            predictions=predictions,
            num_classes=num_classes,
            weights=weights)
        '''
          metric_map:
            {'eval/miou_1.0_overall': (<tf.Tensor 'mean_iou/Select_1:0' shape=() dtype=float32>, 
                                       <tf.Tensor 'mean_iou/AssignAdd:0' shape=(19, 19) dtype=float64_ref>)}
    '''
        metric_map['eval/%s_overall_accuracy_' %
                   predictions_tag] = tf.metrics.accuracy(
                       labels=labels, predictions=predictions, weights=weights)
        # IoU for each class.
        '''
        tf.one_hot(indices, depth, on_value=None, off_value=None, axis=None, dtype=None, name=None)
            Returns a one-hot tensor.
        ndices表示输入的多个数值,通常是矩阵形式;depth表示输出的尺寸。
    '''
        one_hot_predictions = tf.one_hot(predictions, num_classes)
        one_hot_predictions = tf.reshape(one_hot_predictions,
                                         [-1, num_classes])  # 预测输出的one_hot
        one_hot_labels = tf.one_hot(labels, num_classes)
        one_hot_labels = tf.reshape(one_hot_labels,
                                    [-1, num_classes])  # 真实label的one_hot
        for c in range(num_classes):
            predictions_tag_c = '%s_class_%d' % (predictions_tag, c
                                                 )  # miou_1.0_class_c
            predictions_tag_c1 = '%s_class_%d' % (predictions_tag1, c)
            tp, tp_op = tf.metrics.true_positives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            fp, fp_op = tf.metrics.false_positives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            fn, fn_op = tf.metrics.false_negatives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            tn, tn_op = tf.metrics.true_negatives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            tp_fp_fn_op = tf.group(tp_op, fp_op, fn_op)
            iou = tf.where(tf.greater(tp + fn, 0.0), tp / (tp + fn + fp),
                           tf.constant(np.NaN))
            ap = tf.where(tf.greater(tp + fn, 0.0),
                          (tp + tn) / (tp + tn + fn + fp), tf.constant(np.NaN))
            metric_map['eval/%s' % predictions_tag_c] = (iou, tp_fp_fn_op)
            metric_map['eval/%s' % predictions_tag_c1] = (ap, tp_fp_fn_op)

        (metrics_to_values,
         metrics_to_updates) = contrib_metrics.aggregate_metric_map(metric_map)
        '''
        (metrics_to_values, metrics_to_updates):
            ({'eval/miou_1.0_class_5': <tf.Tensor 'Select_6:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_18': <tf.Tensor 'Select_19:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_13': <tf.Tensor 'Select_14:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_1': <tf.Tensor 'Select_2:0' shape=() dtype=float32>, 
             'eval/miou_1.0_overall': <tf.Tensor 'mean_iou/Select_1:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_17': <tf.Tensor 'Select_18:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_8': <tf.Tensor 'Select_9:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_2': <tf.Tensor 'Select_3:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_0': <tf.Tensor 'Select_1:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_3': <tf.Tensor 'Select_4:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_14': <tf.Tensor 'Select_15:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_11': <tf.Tensor 'Select_12:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_6': <tf.Tensor 'Select_7:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_15': <tf.Tensor 'Select_16:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_4': <tf.Tensor 'Select_5:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_9': <tf.Tensor 'Select_10:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_16': <tf.Tensor 'Select_17:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_7': <tf.Tensor 'Select_8:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_10': <tf.Tensor 'Select_11:0' shape=() dtype=float32>, 
             'eval/miou_1.0_class_12': <tf.Tensor 'Select_13:0' shape=() dtype=float32>}, 

            {'eval/miou_1.0_class_5': <tf.Operation 'group_deps_5' type=NoOp>, 
              'eval/miou_1.0_class_18': <tf.Operation 'group_deps_18' type=NoOp>, 
              'eval/miou_1.0_class_13': <tf.Operation 'group_deps_13' type=NoOp>, 
              'eval/miou_1.0_class_1': <tf.Operation 'group_deps_1' type=NoOp>, 
              'eval/miou_1.0_overall': <tf.Tensor 'mean_iou/AssignAdd:0' shape=(19, 19) dtype=float64_ref>, 
              'eval/miou_1.0_class_17': <tf.Operation 'group_deps_17' type=NoOp>, 
              'eval/miou_1.0_class_8': <tf.Operation 'group_deps_8' type=NoOp>, 
              'eval/miou_1.0_class_2': <tf.Operation 'group_deps_2' type=NoOp>, 
              'eval/miou_1.0_class_0': <tf.Operation 'group_deps' type=NoOp>, 
              'eval/miou_1.0_class_3': <tf.Operation 'group_deps_3' type=NoOp>, 
              'eval/miou_1.0_class_14': <tf.Operation 'group_deps_14' type=NoOp>, 
              'eval/miou_1.0_class_11': <tf.Operation 'group_deps_11' type=NoOp>, 
              'eval/miou_1.0_class_6': <tf.Operation 'group_deps_6' type=NoOp>, 
              'eval/miou_1.0_class_15': <tf.Operation 'group_deps_15' type=NoOp>, 
              'eval/miou_1.0_class_4': <tf.Operation 'group_deps_4' type=NoOp>, 
              'eval/miou_1.0_class_9': <tf.Operation 'group_deps_9' type=NoOp>, 
              'eval/miou_1.0_class_16': <tf.Operation 'group_deps_16' type=NoOp>, 
              'eval/miou_1.0_class_7': <tf.Operation 'group_deps_7' type=NoOp>, 
              'eval/miou_1.0_class_10': <tf.Operation 'group_deps_10' type=NoOp>, 
              'eval/miou_1.0_class_12': <tf.Operation 'group_deps_12' type=NoOp>})

        '''
        '''
    tf.Print(input, data, message=None, first_n=None, summarize=None, name=None)
        最低要求两个输入,input和data,input是需要打印的变量的名字,data要求是一个list,里面包含要打印的内容。
    '''
        summary_ops = []
        for metric_name, metric_value in six.iteritems(metrics_to_values):
            op = tf.summary.scalar(metric_name, metric_value)  # 显示标量信息
            op = tf.Print(op, [metric_value], metric_name)
            summary_ops.append(op)

        summary_op = tf.summary.merge(summary_ops)
        summary_hook = contrib_training.SummaryAtEndHook(
            log_dir=FLAGS.eval_logdir, summary_op=summary_op)
        hooks = [summary_hook]

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:  # 为0  暂不考虑
            num_eval_iters = FLAGS.max_number_of_evaluations

        if FLAGS.quantize_delay_step >= 0:  # -1 暂不考虑
            contrib_quantize.create_eval_graph()

        contrib_tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=contrib_tfprof.model_analyzer.
            TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
        contrib_tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
        contrib_training.evaluate_repeatedly(
            checkpoint_dir=FLAGS.checkpoint_dir,
            master=FLAGS.master,
            eval_ops=list(metrics_to_updates.values()),
            max_number_of_evaluations=num_eval_iters,
            hooks=hooks,
            eval_interval_secs=FLAGS.eval_interval_secs)
Exemple #12
0
def save_output_samples(checkpoint_dir,
                        best_chekpnt,
                        eval_preprocess_threads=8,
                        eval_crop_size=[1024, 2048]):
    eval_batch_size = 1
    compressed_reconstructed_dir = os.path.join(
        checkpoint_dir, 'compressed_reconstructed_images')
    if not os.path.exists(compressed_reconstructed_dir):
        os.makedirs(compressed_reconstructed_dir)
        logger.info('Creating directory  ' + compressed_reconstructed_dir +
                    '/')

    eval_split = 'val'
    num_sample_output = 20

    dataset = data_generator.Dataset(
        dataset_name='cityscapes',
        split_name=eval_split,
        dataset_dir=
        '/datatmp/Experiments/belbarashy/datasets/Cityscapes/tfrecord/',
        batch_size=eval_batch_size,
        crop_size=[int(sz) for sz in eval_crop_size],
        min_resize_value=None,
        max_resize_value=None,
        resize_factor=None,
        model_variant=None,
        num_readers=eval_preprocess_threads,
        is_training=False,
        should_shuffle=False,
        should_repeat=False)

    samples = dataset.get_one_shot_iterator().get_next()
    in_imgs = samples['image'] / 255
    depth = samples['depth'] / 255
    labels = samples['label']
    num_classes = dataset.num_of_classes

    # =================================== arch
    _, _, _, _, _, _, _, _, logits, _ = \
        build_model(in_imgs, depth, None, num_classes, mode='testing')
    # ===================================
    predictions = tf.argmax(logits, 3)  # batch*H*W*1

    with tf.Session() as sess:
        if best_chekpnt is None:
            latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)
            best_chekpnt = latest
        tf.train.Saver().restore(sess, save_path=best_chekpnt)
        for i in range(num_sample_output):
            test_file_name = str(i)
            depth_path = os.path.join(compressed_reconstructed_dir,
                                      test_file_name + '_depth' + '.png')
            orig_path = os.path.join(compressed_reconstructed_dir,
                                     test_file_name + '_orig' + '.png')
            map_gt_path = os.path.join(compressed_reconstructed_dir,
                                       test_file_name + '_map_gt' + '.png')
            map_pred_path = os.path.join(compressed_reconstructed_dir,
                                         test_file_name + '_map_pred' + '.png')

            p, l, input_img, dep = sess.run(
                [predictions, labels, in_imgs, depth])
            l = np.squeeze(l)
            p = np.squeeze(p)
            input_img = np.squeeze(input_img)
            dep = np.squeeze(dep)
            p[l == 255] = 255

            colored_label = get_dataset_colormap.label_to_color_image(
                l, 'cityscapes')
            colored_pred = get_dataset_colormap.label_to_color_image(
                p, 'cityscapes')

            dep_jet = cv2.applyColorMap(np.uint8(dep * (255 * 2)),
                                        cv2.COLORMAP_JET)
            cv2.imwrite(depth_path, dep_jet)
            colored_pred = np.uint8(colored_pred[:, :, ::-1])
            cv2.imwrite(map_pred_path, colored_pred)
            colored_label = np.uint8(colored_label[:, :, ::-1])
            cv2.imwrite(map_gt_path, colored_label)
            input_img = np.uint8(input_img[:, :, ::-1] * 255)
            cv2.imwrite(orig_path, input_img)
Exemple #13
0
def eval_seg(checkpoint_dir,
             eval_preprocess_threads=8,
             eval_crop_size=[1024, 2048],
             eval_logdir='tmp_eval_log/',
             eval_batch_size=1,
             eval_repeatedly=False):
    eval_split = 'val'
    dataset = data_generator.Dataset(
        dataset_name='cityscapes',
        split_name=eval_split,
        dataset_dir=
        '/datatmp/Experiments/belbarashy/datasets/Cityscapes/tfrecord/',
        batch_size=eval_batch_size,
        crop_size=[int(sz) for sz in eval_crop_size],
        min_resize_value=None,
        max_resize_value=None,
        resize_factor=None,
        model_variant=None,
        num_readers=eval_preprocess_threads,
        is_training=False,
        should_shuffle=False,
        should_repeat=False)

    tf.gfile.MakeDirs(eval_logdir)
    logger.info('Evaluating on ' + eval_split + ' set')

    with tf.Graph().as_default():
        samples = dataset.get_one_shot_iterator().get_next()
        # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly.
        samples['image'].set_shape([
            eval_batch_size,
            int(eval_crop_size[0]),
            int(eval_crop_size[1]), 3
        ])

        num_classes = dataset.num_of_classes
        in_imgs = samples['image'] / 255
        depth = samples['depth'] / 255
        labels = samples['label']
        # =================================== arch
        _, _, _, _, _, _, _, _, logits, _ = \
            build_model(in_imgs, depth, None, num_classes, mode='testing')
        if logits is None:
            highest_val_miou = 0
            best_chekpnt = None
            return highest_val_miou, best_chekpnt
        # ===================================

        predictions = tf.argmax(logits, 3)
        predictions = tf.reshape(predictions, shape=[-1])
        labels = tf.reshape(labels, shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        eval_scales = [1.0]
        for eval_scale in eval_scales:
            predictions_tag += '_' + str(eval_scale)

        # Define the evaluation metric ==> mIOU over class
        miou, update_op = tf.metrics.mean_iou(predictions,
                                              labels,
                                              num_classes,
                                              weights=weights)
        tf.summary.scalar(predictions_tag, miou)

        summary_op = tf.summary.merge_all()
        summary_hook = tf.contrib.training.SummaryAtEndHook(
            log_dir=eval_logdir, summary_op=summary_op)
        hooks = [summary_hook]

        num_eval_iters = 100000

        tf.contrib.tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=tf.contrib.tfprof.model_analyzer.
            TRAINABLE_VARS_PARAMS_STAT_OPTIONS)

        tf.contrib.tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=tf.contrib.tfprof.model_analyzer.FLOAT_OPS_OPTIONS)

        latest = tf.train.latest_checkpoint(checkpoint_dir=checkpoint_dir)
        if eval_repeatedly:
            logger.info('start evaluation repeatedly')
            tf.contrib.training.evaluate_repeatedly(
                master='',
                checkpoint_dir=checkpoint_dir,
                eval_ops=[update_op],
                max_number_of_evaluations=num_eval_iters,
                hooks=hooks,
                eval_interval_secs=eval_interval_secs)
        else:
            logger.info('start evaluating last 5 checkpoints')
            checkpnts_paths = tf.train.get_checkpoint_state(
                checkpoint_dir=checkpoint_dir).all_model_checkpoint_paths
            best_chekpnt = latest
            highest_val_miou = 0
            for chekpnt_path in checkpnts_paths:
                final_m = tf.contrib.training.evaluate_once(
                    checkpoint_path=chekpnt_path,
                    master='',
                    eval_ops=[update_op],
                    final_ops=miou,
                    hooks=hooks)
                if final_m > highest_val_miou:
                    highest_val_miou = final_m
                    best_chekpnt = chekpnt_path
                logger.info(chekpnt_path + ' ==> mIOU ' + str(final_m))

            logger.info('==============================================')
            logger.info('highest_val_miou = ' + str(highest_val_miou))
            logger.info('best_chekpnt = ' + str(best_chekpnt))
            logger.info('==============================================')

    if not (eval_repeatedly):
        tf.reset_default_graph()
        save_output_samples(checkpoint_dir, best_chekpnt,
                            eval_preprocess_threads, eval_crop_size)
        return highest_val_miou, best_chekpnt
Exemple #14
0
def main(unused_argv):
  tf.logging.set_verbosity(tf.logging.INFO)

  # Get dataset-dependent information.
  dataset = data_generator.Dataset(
      dataset_name=FLAGS.dataset,
      split_name=FLAGS.vis_split,
      dataset_dir=FLAGS.dataset_dir,
      batch_size=FLAGS.vis_batch_size,
      crop_size=[int(sz) for sz in FLAGS.vis_crop_size],
      min_resize_value=FLAGS.min_resize_value,
      max_resize_value=FLAGS.max_resize_value,
      resize_factor=FLAGS.resize_factor,
      model_variant=FLAGS.model_variant,
      is_training=False,
      should_shuffle=False,
      should_repeat=False)

  train_id_to_eval_id = None

  # Prepare for visualization.
  tf.gfile.MakeDirs(FLAGS.vis_logdir)
  save_dir = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER)
  save_dir1 = os.path.join(FLAGS.vis_logdir, _SEMANTIC_PREDICTION_SAVE_FOLDER1)
  tf.gfile.MakeDirs(save_dir)
  raw_save_dir = os.path.join(
      FLAGS.vis_logdir, _RAW_SEMANTIC_PREDICTION_SAVE_FOLDER)
  raw_save_dir_label = os.path.join(
      FLAGS.vis_logdir, _LABEL_SEMANTIC_PREDICTION_SAVE_FOLDER)
  tf.gfile.MakeDirs(raw_save_dir)
  tf.gfile.MakeDirs(raw_save_dir_label)

  tf.logging.info('Visualizing on %s set', FLAGS.vis_split)

  with tf.Graph().as_default():
    samples = dataset.get_one_shot_iterator().get_next()

    model_options = common.ModelOptions(
        outputs_to_num_classes={common.OUTPUT_TYPE: dataset.num_of_classes},
        crop_size=[int(sz) for sz in FLAGS.vis_crop_size],
        atrous_rates=FLAGS.atrous_rates,
        output_stride=FLAGS.output_stride)

    if tuple(FLAGS.eval_scales) == (1.0,):
      tf.logging.info('Performing single-scale test.')
      predictions = model.predict_labels(
          samples[common.IMAGE],
          model_options=model_options,
          image_pyramid=FLAGS.image_pyramid)
    else:
      tf.logging.info('Performing multi-scale test.')
      if FLAGS.quantize_delay_step >= 0:
        raise ValueError(
            'Quantize mode is not supported with multi-scale test.')
      predictions = model.predict_labels_multi_scale(
          samples[common.IMAGE],
          model_options=model_options,
          eval_scales=FLAGS.eval_scales,
          add_flipped_images=FLAGS.add_flipped_images)
    predictions = predictions[common.OUTPUT_TYPE]
    predict = tf.reshape(predictions, shape=[-1])
    labels = tf.reshape(samples[common.LABEL], shape=[-1])
    weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

    # Set ignore_label regions to label 0, because metrics.mean_iou requires
    # range of labels = [0, dataset.num_classes). Note the ignore_label regions
    # are not evaluated since the corresponding regions contain weights = 0.
    labels = tf.where(
        tf.equal(labels, dataset.ignore_label), tf.zeros_like(labels), labels)

    miou_tf, update_op_tf = tf.metrics.mean_iou(
        predict, labels, dataset.num_of_classes, weights=weights)
    
    from deeplab import my_metrics    
    iou_v, update_op = my_metrics.iou(predict, labels, dataset.num_of_classes, weights=weights)

    if FLAGS.min_resize_value and FLAGS.max_resize_value:
      # Only support batch_size = 1, since we assume the dimensions of original
      # image after tf.squeeze is [height, width, 3].
      assert FLAGS.vis_batch_size == 1

      # Reverse the resizing and padding operations performed in preprocessing.
      # First, we slice the valid regions (i.e., remove padded region) and then
      # we resize the predictions back.
      original_image = tf.squeeze(samples[common.ORIGINAL_IMAGE])
      original_image_shape = tf.shape(original_image)
      predictions = tf.slice(
          predictions,
          [0, 0, 0],
          [1, original_image_shape[0], original_image_shape[1]])
      resized_shape = tf.to_int32([tf.squeeze(samples[common.HEIGHT]),
                                   tf.squeeze(samples[common.WIDTH])])
      predictions = tf.squeeze(
          tf.image.resize_images(tf.expand_dims(predictions, 3),
                                 resized_shape,
                                 method=tf.image.ResizeMethod.NEAREST_NEIGHBOR,
                                 align_corners=True), 3)

    tf.train.get_or_create_global_step()
    if FLAGS.quantize_delay_step >= 0:
      tf.contrib.quantize.create_eval_graph()

    num_iteration = 0
    max_num_iteration = FLAGS.max_number_of_iterations

    checkpoints_iterator = tf.contrib.training.checkpoints_iterator(
        FLAGS.checkpoint_dir, min_interval_secs=FLAGS.eval_interval_secs)
    for checkpoint_path in checkpoints_iterator:
      num_iteration += 1
      tf.logging.info(
          'Starting visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                       time.gmtime()))
      tf.logging.info('Visualizing with model %s', checkpoint_path)

      scaffold = tf.train.Scaffold(init_op=tf.global_variables_initializer())
      session_creator = tf.train.ChiefSessionCreator(
          scaffold=scaffold,
          master=FLAGS.master,
          checkpoint_filename_with_path=checkpoint_path)
      with tf.train.MonitoredSession(
          session_creator=session_creator, hooks=None) as sess:
        batch = 0
        image_id_offset = 0

        while not sess.should_stop():
          tf.logging.info('Visualizing batch %d', batch + 1)
          _process_batch(sess=sess,
                         original_images=samples[common.ORIGINAL_IMAGE],
                         semantic_predictions=predictions,
                         gt_labels=samples[common.LABEL],
                         image_names=samples[common.IMAGE_NAME],
                         image_heights=samples[common.HEIGHT],
                         image_widths=samples[common.WIDTH],
                         image_id_offset=image_id_offset,
                         update_op=update_op,
                         iou=iou_v,
                         save_dir=save_dir,
                         save_dir1=save_dir1,
                         raw_save_dir=raw_save_dir,
                         raw_save_dir_label=raw_save_dir_label,
                         mtf = miou_tf,
                         utf = update_op_tf,
                         train_id_to_eval_id=train_id_to_eval_id
                         )
          image_id_offset += FLAGS.vis_batch_size
          batch += 1

      tf.logging.info(
          'Finished visualization at ' + time.strftime('%Y-%m-%d-%H:%M:%S',
                                                       time.gmtime()))
      if max_num_iteration > 0 and num_iteration >= max_num_iteration:
        break

    mean_eval = np.array(mean_iou_eval)
    print("Mean IoU on validation dataset: ", np.mean(mean_eval))
    print("Mean IoU for background: ", np.nanmean(miou_class1_back))
    print("Mean IoU for stem: ", np.nanmean(miou_class2_stem))
    print("Mean IoU for callus: ", np.nanmean(miou_class3_cal))
    print("Mean IoU for shoot: ", np.nanmean(miou_class4_shoot))

    print("Renaming files.")
    segmentation_res_path = os.path.join('./', FLAGS.vis_logdir, _LABEL_SEMANTIC_PREDICTION_SAVE_FOLDER)
    for file in os.listdir(segmentation_res_path):
      src = file
      dst = os.path.join(segmentation_res_path, src[2:-5] + '.png')
      os.rename(os.path.join(segmentation_res_path, src), dst)

    print(np.mean(mean_eval))
Exemple #15
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    tf.gfile.MakeDirs(FLAGS.train_logdir)
    tf.logging.info('Training on %s set', FLAGS.train_split)

    graph = tf.Graph()
    with graph.as_default():
        with tf.device(
                tf.train.replica_device_setter(ps_tasks=FLAGS.num_ps_tasks)):
            assert FLAGS.train_batch_size % FLAGS.num_clones == 0, (
                'Training batch size not divisble by number of clones (GPUs).')
            clone_batch_size = FLAGS.train_batch_size // FLAGS.num_clones

            dataset = data_generator.Dataset(
                dataset_name=FLAGS.dataset,
                split_name=FLAGS.train_split,
                dataset_dir=FLAGS.dataset_dir,
                batch_size=clone_batch_size,
                crop_size=FLAGS.train_crop_size,
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                min_scale_factor=FLAGS.min_scale_factor,
                max_scale_factor=FLAGS.max_scale_factor,
                scale_factor_step_size=FLAGS.scale_factor_step_size,
                model_variant=FLAGS.model_variant,
                num_readers=2,
                is_training=True,
                should_shuffle=True,
                should_repeat=True)

            vdataset = data_generator.Dataset(
                dataset_name=FLAGS.dataset,
                split_name=FLAGS.trainval_split,
                dataset_dir=FLAGS.dataset_dir,
                batch_size=FLAGS.trainval_batch_size,
                crop_size=FLAGS.train_crop_size,
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                min_scale_factor=FLAGS.min_scale_factor,
                max_scale_factor=FLAGS.max_scale_factor,
                scale_factor_step_size=FLAGS.scale_factor_step_size,
                model_variant=FLAGS.model_variant,
                num_readers=2,
                is_training=True,
                should_shuffle=False,
                should_repeat=False)

            viterator = vdataset.get_initializable_iterator()
            next_element = viterator.get_next()

            val_image = tf.placeholder(tf.float32,
                                       shape=(None, FLAGS.train_crop_size[0],
                                              FLAGS.train_crop_size[1], 3))
            val_label = tf.placeholder(tf.int32,
                                       shape=(None, FLAGS.train_crop_size[0],
                                              FLAGS.train_crop_size[1], 1))

            train_tensor, summary_op = _train_deeplab_model(
                dataset.get_one_shot_iterator(), dataset.num_of_classes,
                dataset.ignore_label)

            val_tensor = _val_loss(dataset=vdataset,
                                   image=val_image,
                                   label=val_label,
                                   num_of_classes=vdataset.num_of_classes,
                                   ignore_label=vdataset.ignore_label)

            # Soft placement allows placing on CPU ops without GPU implementation.
            session_config = tf.ConfigProto(allow_soft_placement=True,
                                            log_device_placement=False)

            last_layers = model.get_extra_layer_scopes(
                FLAGS.last_layers_contain_logits_only)
            init_fn = None
            if FLAGS.tf_initial_checkpoint:
                init_fn = train_utils.get_model_init_fn(
                    FLAGS.train_logdir,
                    FLAGS.tf_initial_checkpoint,
                    FLAGS.initialize_last_layer,
                    last_layers,
                    ignore_missing_vars=True)

            scaffold = tf.train.Scaffold(
                init_fn=init_fn,
                summary_op=summary_op,
            )

            stop_hook = tf.train.StopAtStepHook(FLAGS.training_number_of_steps)

            # Validation set variables
            epoch = 0
            val_loss_per_epoch = []
            steps_per_epoch = int(dataset.num_samples / FLAGS.train_batch_size)
            saver = tf.train.Saver(max_to_keep=1)

            profile_dir = FLAGS.profile_logdir
            if profile_dir is not None:
                tf.gfile.MakeDirs(profile_dir)

            with tf.contrib.tfprof.ProfileContext(enabled=profile_dir
                                                  is not None,
                                                  profile_dir=profile_dir):
                with tf.train.MonitoredTrainingSession(
                        master=FLAGS.master,
                        is_chief=(FLAGS.task == 0),
                        config=session_config,
                        scaffold=scaffold,
                        checkpoint_dir=FLAGS.train_logdir,
                        log_step_count_steps=FLAGS.log_steps,
                        save_summaries_steps=FLAGS.save_summaries_secs,
                        save_checkpoint_secs=FLAGS.save_interval_secs,
                        hooks=[]) as sess:
                    while not sess.should_stop():
                        step = sess.run(tf.train.get_global_step())
                        sess.run([train_tensor])
                        if step % steps_per_epoch == 0:
                            count_validation = 0
                            stop_training = False
                            val_losses = []
                            sess.run(viterator.initializer)
                            while True:
                                try:
                                    val_element = sess.run(next_element)
                                    val_loss, val_summary = sess.run(
                                        val_tensor,
                                        feed_dict={
                                            val_image:
                                            val_element[common.IMAGE],
                                            val_label:
                                            val_element[common.LABEL]
                                        })
                                    val_losses.append(val_loss)
                                    count_validation += 1
                                    #print('  {} [validation] {} {}'.format(count_validation, val_loss, val_element[common.IMAGE_NAME]))
                                except tf.errors.OutOfRangeError:
                                    total_val_loss = sum(val_losses) / len(
                                        val_losses)
                                    val_loss_per_epoch.append(total_val_loss)
                                    print('  {} [validation loss] {}'.format(
                                        count_validation *
                                        FLAGS.train_batch_size,
                                        total_val_loss))
                                    print('  {} [current epoch]   {}'.format(
                                        step, epoch))
                                    break
                            if epoch > 0:
                                min_delta = 0.01
                                patience = 8
                                stop_training = early_stopping(
                                    epoch, val_loss_per_epoch, min_delta,
                                    patience, sess, saver, total_val_loss)
                            # Stops training if current model val loss is worse than previous model val loss
                            if stop_training:
                                break
                            epoch += 1
Exemple #16
0
def train(l_args):
    """Trains the model."""
    if l_args.verbose:
        tf.logging.set_verbosity(tf.logging.INFO)

    # Create input data pipeline.

    dataset = data_generator.Dataset(
        dataset_name='cityscapes',
        split_name='train',
        dataset_dir=
        '/datatmp/Experiments/belbarashy/datasets/Cityscapes/tfrecord/',
        batch_size=l_args.batchsize,
        crop_size=[int(sz) for sz in [l_args.patchsize, l_args.patchsize]],
        min_resize_value=None,
        max_resize_value=None,
        resize_factor=None,
        min_scale_factor=0.5,
        max_scale_factor=2.,
        scale_factor_step_size=0.25,
        model_variant=None,
        num_readers=l_args.preprocess_threads,
        is_training=True,
        should_shuffle=True,
        should_repeat=True)

    # reading batch: keys of samples ['height', 'width', 'image_name', 'label', 'image']
    num_classes = dataset.num_of_classes
    samples = dataset.get_one_shot_iterator().get_next()

    #num_pixels = l_args.batchsize * l_args.patchsize ** 2
    x = samples['image'] / 255
    depth = samples['depth'] / 255
    labels = samples['label']

    # Build autoencoder.
    train_loss, train_bpp, train_mse, x_tilde, _, _, _, entropy_bottleneck, seg_logits, seg_loss = \
        build_model(x, depth, l_args.lmbda, num_classes, mode = 'training', seg_labels = labels,
                    ignore_label = dataset.ignore_label)

    # Minimize loss and auxiliary loss, and execute update op.
    step = tf.train.get_or_create_global_step()
    main_optimizer = tf.train.AdamOptimizer(learning_rate=1e-4)
    main_step = main_optimizer.minimize(train_loss, global_step=step)

    if entropy_bottleneck is not None:
        aux_optimizer = tf.train.AdamOptimizer(learning_rate=1e-3)
        aux_step = aux_optimizer.minimize(entropy_bottleneck.losses[0])
        entropy_bottleneck.visualize(
        )  ## Creates summary for the probability mass function (PMF) estimated in the bottleneck
        train_op = tf.group(main_step, aux_step, entropy_bottleneck.updates[0])
    else:
        train_op = tf.group(main_step)

    log_all_summaries(x, x_tilde, seg_logits, labels, train_loss, train_bpp,
                      train_mse, seg_loss)

    hooks = [
        tf.train.StopAtStepHook(last_step=l_args.last_step),
        tf.train.NanTensorHook(train_loss),
    ]
    with tf.train.MonitoredTrainingSession(
            hooks=hooks,
            checkpoint_dir=l_args.checkpoint_dir,
            save_checkpoint_secs=300,
            save_summaries_secs=60) as sess:
        while not sess.should_stop():
            sess.run(train_op)
Exemple #17
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    dataset = data_generator.Dataset(
        dataset_name=FLAGS.dataset,
        split_name=FLAGS.eval_split,
        dataset_dir=FLAGS.dataset_dir,
        batch_size=FLAGS.eval_batch_size,
        crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        model_variant=FLAGS.model_variant,
        num_readers=2,
        is_training=False,
        should_shuffle=False,
        should_repeat=False)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    with tf.Graph().as_default():
        samples = dataset.get_one_shot_iterator().get_next()

        model_options = common.ModelOptions(
            model_name=FLAGS.model_name,
            outputs_to_num_classes={
                common.OUTPUT_TYPE: dataset.num_of_classes
            },
            crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly.
        samples[common.IMAGE].set_shape([
            FLAGS.eval_batch_size,
            int(FLAGS.eval_crop_size[0]),
            int(FLAGS.eval_crop_size[1]), 3
        ])
        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            if FLAGS.quantize_delay_step >= 0:
                raise ValueError(
                    'Quantize mode is not supported with multi-scale test.')

            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])
        labels = tf.reshape(samples[common.LABEL], shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights = 0.
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        for eval_scale in FLAGS.eval_scales:
            predictions_tag += '_' + str(eval_scale)
        if FLAGS.add_flipped_images:
            predictions_tag += '_flipped'

        # Define the evaluation metric.
        metric_map = {}
        num_classes = dataset.num_of_classes
        metric_map['eval/%s_overall' % predictions_tag] = tf.metrics.mean_iou(
            labels=labels,
            predictions=predictions,
            num_classes=num_classes,
            weights=weights)
        # IoU for each class.
        one_hot_predictions = tf.one_hot(predictions, num_classes)
        one_hot_predictions = tf.reshape(one_hot_predictions,
                                         [-1, num_classes])
        one_hot_labels = tf.one_hot(labels, num_classes)
        one_hot_labels = tf.reshape(one_hot_labels, [-1, num_classes])
        for c in range(num_classes):
            predictions_tag_c = '%s_class_%d' % (predictions_tag, c)
            tp, tp_op = tf.metrics.true_positives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            fp, fp_op = tf.metrics.false_positives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            fn, fn_op = tf.metrics.false_negatives(
                labels=one_hot_labels[:, c],
                predictions=one_hot_predictions[:, c],
                weights=weights)
            tp_fp_fn_op = tf.group(tp_op, fp_op, fn_op)
            iou = tf.where(tf.greater(tp + fn, 0.0), tp / (tp + fn + fp),
                           tf.constant(np.NaN))
            metric_map['eval/%s' % predictions_tag_c] = (iou, tp_fp_fn_op)

        (metrics_to_values,
         metrics_to_updates) = contrib_metrics.aggregate_metric_map(metric_map)

        summary_ops = []
        for metric_name, metric_value in six.iteritems(metrics_to_values):
            op = tf.summary.scalar(metric_name, metric_value)
            op = tf.Print(op, [metric_value], metric_name)
            summary_ops.append(op)

        summary_op = tf.summary.merge(summary_ops)
        summary_hook = contrib_training.SummaryAtEndHook(
            log_dir=FLAGS.eval_logdir, summary_op=summary_op)
        hooks = [summary_hook]

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations

        if FLAGS.quantize_delay_step >= 0:
            contrib_quantize.create_eval_graph()

        contrib_tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=contrib_tfprof.model_analyzer.
            TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
        contrib_tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS)
        contrib_training.evaluate_repeatedly(
            checkpoint_dir=FLAGS.checkpoint_dir,
            master=FLAGS.master,
            eval_ops=list(metrics_to_updates.values()),
            max_number_of_evaluations=num_eval_iters,
            hooks=hooks,
            eval_interval_secs=FLAGS.eval_interval_secs)
Exemple #18
0
def eval(l_args, expDir, lmbda, best_chekpnt, val_miou):
    train_dir = l_args.checkpoint_dir
    metrics_path = os.path.join(train_dir, 'metrics_args.pkl')
    l_args.lmbda = lmbda
    compressed_reconstructed_dir = os.path.join(
        train_dir, 'compressed_reconstructed_images')
    if not os.path.exists(compressed_reconstructed_dir):
        os.makedirs(compressed_reconstructed_dir)
    val_split_size = 500

    dataset = data_generator.Dataset(
        dataset_name='cityscapes',
        split_name='val',
        dataset_dir=
        '/datatmp/Experiments/belbarashy/datasets/Cityscapes/tfrecord/',
        batch_size=1,  #l_args.batchsize
        crop_size=[int(sz) for sz in [1024, 2048]],
        min_resize_value=None,
        max_resize_value=None,
        resize_factor=None,
        model_variant=None,
        num_readers=l_args.preprocess_threads,
        is_training=False,
        should_shuffle=False,
        should_repeat=False)

    # reading batch: keys of samples ['height', 'width', 'image_name', 'label', 'image']
    num_classes = dataset.num_of_classes
    samples = dataset.get_one_shot_iterator().get_next()

    x = samples['image'] / 255
    depth = samples['depth'] / 255
    labels = samples['label']
    num_pixels = tf.to_float(tf.reduce_prod(tf.shape(x)[:-1]))

    # ======================== Input image dim should be multiple of 16
    x_shape = tf.shape(x)
    x_shape = tf.ceil(x_shape / 16) * 16
    x = tf.image.resize_images(x, (x_shape[1], x_shape[2]))
    # ========================
    """ build model """
    _, eval_bpp, _, x_hat, y_hat, y, string, _, seg_logits, seg_loss = \
        build_model(x, depth, l_args.lmbda, num_classes, mode = 'testing')

    # Bring both images back to 0..255 range.
    x *= 255
    img_file_name = tf.placeholder(tf.string)
    noReconstuction = False
    if x_hat is None:
        noReconstuction = True
        save_reconstructed_op = None
    else:
        x_hat_to_save = tf.identity(x_hat[0, :, :, :])
        x_hat = tf.clip_by_value(x_hat, 0, 1)
        x_hat = tf.round(x_hat * 255)

        mse = tf.reduce_mean(tf.squared_difference(x, x_hat))
        psnr = tf.squeeze(tf.image.psnr(x_hat, x, 255))
        msssim = tf.squeeze(tf.image.ssim_multiscale(x_hat, x, 255))
        # Write reconstructed image out as a PNG file.
        save_reconstructed_op = save_image(img_file_name, x_hat_to_save)

    logger.info('Testing the model on ' + str(val_split_size) +
                ' images and save the reconstructed images')
    msel, psnrl, msssiml, msssim_dbl, eval_bppl, bppl = [], [], [], [], [], []

    with tf.Session() as sess:
        # Load the latest model checkpoint, get the compressed string and the tensor
        # shapes.
        if best_chekpnt is None:
            latest = tf.train.latest_checkpoint(
                checkpoint_dir=l_args.checkpoint_dir)
            best_chekpnt = latest
        tf.train.Saver().restore(sess, save_path=best_chekpnt)

        for i in range(val_split_size):
            test_file_name = str(i)
            compressed_im_path = os.path.join(
                compressed_reconstructed_dir,
                test_file_name + '_compressed' + '.bin')
            reconstucted_im_path = os.path.join(
                compressed_reconstructed_dir,
                test_file_name + '_reconstructed' + '.png')
            im_metrics_path = os.path.join(
                compressed_reconstructed_dir,
                test_file_name + '_metrics' + '.pkl')
            l_args.output = reconstucted_im_path

            if (i < 50) and not (noReconstuction):
                eval_bpp_, mse_, psnr_, msssim_, num_pixels_, string_, x_shape, y_shape, _ = \
                    sess.run( [eval_bpp, mse, psnr, msssim, num_pixels, string,
                                tf.shape(x), tf.shape(y), save_reconstructed_op],
                                feed_dict={img_file_name:reconstucted_im_path})
            else:
                if eval_bpp is not None:
                    if noReconstuction:
                        eval_bpp_, num_pixels_, string_, x_shape, y_shape = \
                            sess.run( [eval_bpp, num_pixels, string, tf.shape(x), tf.shape(y)],
                                        feed_dict={img_file_name:reconstucted_im_path})
                        mse_ = 0
                        psnr_ = 0
                        msssim_ = 0
                    else:
                        eval_bpp_, mse_, psnr_, msssim_, num_pixels_, string_, x_shape, y_shape = \
                            sess.run( [eval_bpp, mse, psnr, msssim, num_pixels, string,tf.shape(x), tf.shape(y)],
                                        feed_dict={img_file_name:reconstucted_im_path})
                else:
                    mse_ = 0
                    psnr_ = 0
                    msssim_ = 0
                    eval_bpp_ = 0
                    num_pixels_ = None
                    string_ = None
                    x_shape = None
                    y_shape = None

            if i < 50 and (string_
                           is not None):  # save only the first 50 test samples
                # Write a binary file with the shape information and the compressed string.
                with open(compressed_im_path, "wb") as f:
                    f.write(np.array(x_shape[1:-1], dtype=np.uint16).tobytes())
                    f.write(np.array(y_shape[1:-1], dtype=np.uint16).tobytes())
                    f.write(string_)

            if string_ is not None:
                # The actual bits per pixel including overhead.
                bpp_ = (8 + len(string_)) * 8 / num_pixels_
            else:
                bpp_ = 0

            print("Mean squared error: {:0.4f}".format(mse_))
            print("PSNR (dB): {:0.2f}".format(psnr_))
            print("Multiscale SSIM: {:0.4f}".format(msssim_))
            print("Multiscale SSIM (dB): {:0.2f}".format(
                -10 * np.log10(1 - msssim_)))
            print("Information content in bpp: {:0.4f}".format(eval_bpp_))
            print("Actual bits per pixel: {:0.4f}".format(bpp_))
            msssim_db_ = (-10 * np.log10(1 - msssim_))

            im_metrics = {
                'mse': mse_,
                'psnr': psnr_,
                'msssim': msssim_,
                'msssim_db': msssim_db_,
                'eval_bpp': eval_bpp_,
                'bpp': bpp_
            }
            with open(im_metrics_path, "wb") as fp:
                pickle.dump(im_metrics, fp)

            msel.append(mse_)
            psnrl.append(psnr_)
            msssiml.append(msssim_)
            msssim_dbl.append(msssim_db_)
            eval_bppl.append(eval_bpp_)
            bppl.append(bpp_)

    logger.info(
        'Averaging metrics and save them with the exp_args in pickle file metrics_args.pkl'
    )
    mse_ = np.mean(msel)
    psnr_ = np.mean(psnrl)
    msssim_ = np.mean(msssiml)
    eval_bpp_ = np.mean(eval_bppl)
    bpp_ = np.mean(bppl)
    msssim_db_ = np.mean(msssim_dbl)

    logger.info('MSE        = ' + str(mse_))
    logger.info('PSNR       = ' + str(psnr_))
    logger.info('MS-SSIM    = ' + str(msssim_))
    logger.info('MS-SSIM db = ' + str(msssim_db_))
    logger.info('Eval_bpp   = ' + str(eval_bpp_))
    logger.info('bpp        = ' + str(bpp_))
    logger.info('mIOU       = ' + str(val_miou))
    exp_avg_metrics = {
        'mse': mse_,
        'psnr': psnr_,
        'msssim': msssim_,
        'msssim_db': msssim_db_,
        'eval_bpp': eval_bpp_,
        'bpp': bpp_,
        'mIOU': val_miou,
        'chk_pnt': best_chekpnt
    }

    with open(metrics_path, "wb") as fp:
        pickle.dump({
            'exp_avg_metrics': exp_avg_metrics,
            'exp_args': l_args
        }, fp)
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    tf.gfile.MakeDirs(FLAGS.train_logdir)
    tf.logging.info('Training on %s set', FLAGS.train_split)

    graph = tf.Graph()
    with graph.as_default():
        with tf.device(
                tf.train.replica_device_setter(ps_tasks=FLAGS.num_ps_tasks)):
            assert FLAGS.train_batch_size % FLAGS.num_clones == 0, (
                'Training batch size not divisble by number of clones (GPUs).')
            clone_batch_size = FLAGS.train_batch_size // FLAGS.num_clones

            dataset = data_generator.Dataset(
                dataset_name=FLAGS.dataset,
                split_name=FLAGS.train_split,
                dataset_dir=FLAGS.dataset_dir,
                batch_size=clone_batch_size,
                crop_size=FLAGS.train_crop_size,
                min_resize_value=FLAGS.min_resize_value,
                max_resize_value=FLAGS.max_resize_value,
                resize_factor=FLAGS.resize_factor,
                min_scale_factor=FLAGS.min_scale_factor,
                max_scale_factor=FLAGS.max_scale_factor,
                scale_factor_step_size=FLAGS.scale_factor_step_size,
                model_variant=FLAGS.model_variant,
                num_readers=2,
                is_training=True,
                should_shuffle=True,
                should_repeat=True)

            train_tensor, summary_op = _train_deeplab_model(
                dataset.get_one_shot_iterator(), dataset.num_of_classes,
                dataset.ignore_label)

            # Soft placement allows placing on CPU ops without GPU implementation.
            session_config = tf.ConfigProto(allow_soft_placement=True,
                                            log_device_placement=False)

            last_layers = model.get_extra_layer_scopes(
                FLAGS.last_layers_contain_logits_only)
            init_fn = None
            if FLAGS.tf_initial_checkpoint:
                init_fn = train_utils.get_model_init_fn(
                    FLAGS.train_logdir,
                    FLAGS.tf_initial_checkpoint,
                    FLAGS.initialize_last_layer,
                    last_layers,
                    ignore_missing_vars=True)

            scaffold = tf.train.Scaffold(
                init_fn=init_fn,
                summary_op=summary_op,
            )

            stop_hook = tf.train.StopAtStepHook(FLAGS.training_number_of_steps)

            profile_dir = FLAGS.profile_logdir
            if profile_dir is not None:
                tf.gfile.MakeDirs(profile_dir)

            with tf.contrib.tfprof.ProfileContext(enabled=profile_dir
                                                  is not None,
                                                  profile_dir=profile_dir):
                with tf.train.MonitoredTrainingSession(
                        master=FLAGS.master,
                        is_chief=(FLAGS.task == 0),
                        config=session_config,
                        scaffold=scaffold,
                        checkpoint_dir=FLAGS.train_logdir,
                        log_step_count_steps=FLAGS.log_steps,
                        save_summaries_steps=FLAGS.save_summaries_secs,
                        save_checkpoint_secs=FLAGS.save_interval_secs,
                        hooks=[stop_hook]) as sess:
                    while not sess.should_stop():
                        sess.run([train_tensor])
Exemple #20
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    # Get dataset-dependent information.
    dataset = data_generator.Dataset(dataset_name=FLAGS.dataset,
                                     split_name=FLAGS.eval_split,
                                     dataset_dir=FLAGS.dataset_dir,
                                     batch_size=FLAGS.eval_batch_size,
                                     crop_size=FLAGS.eval_crop_size,
                                     min_resize_value=FLAGS.min_resize_value,
                                     max_resize_value=FLAGS.max_resize_value,
                                     resize_factor=FLAGS.resize_factor,
                                     model_variant=FLAGS.model_variant,
                                     num_readers=2,
                                     is_training=False,
                                     should_shuffle=False,
                                     should_repeat=False)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    with tf.Graph().as_default():
        samples = dataset.get_one_shot_iterator().get_next()

        model_options = common.ModelOptions(outputs_to_num_classes={
            common.OUTPUT_TYPE:
            dataset.num_of_classes
        },
                                            crop_size=FLAGS.eval_crop_size,
                                            atrous_rates=FLAGS.atrous_rates,
                                            output_stride=FLAGS.output_stride)

        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            predictions = model.predict_labels_multi_scale(
                samples[common.IMAGE],
                model_options=model_options,
                eval_scales=FLAGS.eval_scales,
                add_flipped_images=FLAGS.add_flipped_images)
        predictions = predictions[common.OUTPUT_TYPE]
        predictions = tf.reshape(predictions, shape=[-1])
        labels = tf.reshape(samples[common.LABEL], shape=[-1])
        weights = tf.to_float(tf.not_equal(labels, dataset.ignore_label))

        # Set ignore_label regions to label 0, because metrics.mean_iou requires
        # range of labels = [0, dataset.num_of_classes). Note the ignore_label regions
        # are not evaluated since the corresponding regions contain weights = 0.
        labels = tf.where(tf.equal(labels, dataset.ignore_label),
                          tf.zeros_like(labels), labels)

        predictions_tag = 'miou'
        for eval_scale in FLAGS.eval_scales:
            predictions_tag += '_' + str(eval_scale)
        if FLAGS.add_flipped_images:
            predictions_tag += '_flipped'

        # Define the evaluation metric.
        metric_map = {}
        # ============ Added by B.A.D. =====================
        indices = tf.squeeze(
            tf.where(tf.less_equal(labels, dataset.num_of_classes - 1)), 1)
        labels = tf.cast(tf.gather(labels, indices), tf.int32)
        predictions = tf.gather(predictions, indices)
        # ==============================================
        metric_map[predictions_tag] = tf.metrics.mean_iou(
            predictions, labels, dataset.num_of_classes, weights=weights)

        metrics_to_values, metrics_to_updates = (
            tf.contrib.metrics.aggregate_metric_map(metric_map))

        for metric_name, metric_value in six.iteritems(metrics_to_values):
            slim.summaries.add_scalar_summary(metric_value,
                                              metric_name,
                                              print_summary=True)

        num_batches = int(
            math.ceil(dataset.num_samples / float(FLAGS.eval_batch_size)))

        tf.logging.info('Eval num images %d', dataset.num_samples)
        tf.logging.info('Eval batch size %d and num batch %d',
                        FLAGS.eval_batch_size, num_batches)

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations
        slim.evaluation.evaluation_loop(
            master=FLAGS.master,
            checkpoint_dir=FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_logdir,
            num_evals=num_batches,
            eval_op=list(metrics_to_updates.values()),
            max_number_of_evaluations=num_eval_iters,
            eval_interval_secs=FLAGS.eval_interval_secs)