예제 #1
0
  def test_streaming_metric_on_single_image(self):
    offset = 256 * 256

    instance_class_map = {
        0: 0,
        47: 1,
        97: 1,
        133: 1,
        150: 1,
        174: 1,
        198: 2,
        215: 1,
        244: 1,
        255: 1,
    }
    gt_instances, gt_classes = test_utils.panoptic_segmentation_with_class_map(
        'team_gt_instance.png', instance_class_map)

    pred_classes = test_utils.read_segmentation_with_rgb_color_map(
        'team_pred_class.png', _CLASS_COLOR_MAP)
    pred_instances = test_utils.read_test_image(
        'team_pred_instance.png', mode='L')

    gt_class_tensor = tf.placeholder(tf.uint16)
    gt_instance_tensor = tf.placeholder(tf.uint16)
    pred_class_tensor = tf.placeholder(tf.uint16)
    pred_instance_tensor = tf.placeholder(tf.uint16)
    qualities, update_pq = streaming_metrics.streaming_panoptic_quality(
        gt_class_tensor,
        gt_instance_tensor,
        pred_class_tensor,
        pred_instance_tensor,
        num_classes=3,
        max_instances_per_category=256,
        ignored_label=0,
        offset=offset)
    pq, sq, rq, total_tp, total_fn, total_fp = tf.unstack(qualities, 6, axis=0)
    feed_dict = {
        gt_class_tensor: gt_classes,
        gt_instance_tensor: gt_instances,
        pred_class_tensor: pred_classes,
        pred_instance_tensor: pred_instances
    }

    with self.session() as sess:
      sess.run(tf.local_variables_initializer())
      sess.run(update_pq, feed_dict=feed_dict)
      (result_pq, result_sq, result_rq, result_total_tp, result_total_fn,
       result_total_fp) = sess.run([pq, sq, rq, total_tp, total_fn, total_fp],
                                   feed_dict=feed_dict)
    np.testing.assert_array_almost_equal(
        result_pq, [2.06104, 0.7024, 0.54069], decimal=4)
    np.testing.assert_array_almost_equal(
        result_sq, [2.06104, 0.7526, 0.54069], decimal=4)
    np.testing.assert_array_almost_equal(result_rq, [1., 0.9333, 1.], decimal=4)
    np.testing.assert_array_almost_equal(
        result_total_tp, [1., 7., 1.], decimal=4)
    np.testing.assert_array_almost_equal(
        result_total_fn, [0., 1., 0.], decimal=4)
    np.testing.assert_array_almost_equal(
        result_total_fp, [0., 0., 0.], decimal=4)
예제 #2
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)

    dataset = data_generator.Dataset(
        dataset_name=FLAGS.dataset,
        split_name=FLAGS.eval_split,
        dataset_dir=FLAGS.dataset_dir,
        batch_size=FLAGS.eval_batch_size,
        crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
        min_resize_value=FLAGS.min_resize_value,
        max_resize_value=FLAGS.max_resize_value,
        resize_factor=FLAGS.resize_factor,
        model_variant=FLAGS.model_variant,
        num_readers=2,
        is_training=False,
        should_shuffle=False,
        should_repeat=False)

    tf.gfile.MakeDirs(FLAGS.eval_logdir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)

    with tf.Graph().as_default():
        samples = dataset.get_one_shot_iterator().get_next()

        model_options = common.ModelOptions(
            outputs_to_num_classes={
                common.OUTPUT_TYPE: dataset.num_of_classes,
                common.INSTANCE: 1,
                common.OFFSET: 2
            },
            crop_size=[int(sz) for sz in FLAGS.eval_crop_size],
            atrous_rates=FLAGS.atrous_rates,
            output_stride=FLAGS.output_stride)

        # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly.
        samples[common.IMAGE].set_shape([
            FLAGS.eval_batch_size,
            int(FLAGS.eval_crop_size[0]),
            int(FLAGS.eval_crop_size[1]), 3
        ])

        if tuple(FLAGS.eval_scales) == (1.0, ):
            tf.logging.info('Performing single-scale test.')
            predictions = model.predict_labels(
                samples[common.IMAGE],
                model_options,
                image_pyramid=FLAGS.image_pyramid)
        else:
            tf.logging.info('Performing multi-scale test.')
            if FLAGS.quantize_delay_step >= 0:
                raise ValueError(
                    'Quantize mode is not supported with multi-scale test.')

        predictions_semantic = predictions[common.OUTPUT_TYPE]
        predictions_center_points = predictions[common.INSTANCE]
        predictions_offset_vectors = predictions[common.OFFSET]

        # tf Non-maxima Suppression
        # Pooling based NMS for Pooling Instance Centers
        # Filtering predictions that are less than 0.1
        instance_prediction = generate_instance_segmentation(
            predictions_semantic, predictions_center_points,
            predictions_offset_vectors)

        category_prediction = tf.squeeze(predictions_semantic)

        category_label = tf.squeeze(samples[common.LABEL][0])
        not_ignore_mask = tf.not_equal(category_label, 255)
        category_label = tf.cast(
            category_label * tf.cast(not_ignore_mask, tf.int32), tf.int32)
        instance_label = tf.squeeze(samples[common.LABEL_INSTANCE_IDS][0])
        category_prediction = category_prediction * tf.cast(
            not_ignore_mask, tf.int64)
        instance_prediction = instance_prediction * tf.cast(
            not_ignore_mask, tf.int64)

        # Define the evaluation metric.
        metric_map = {}
        metric_map[
            'panoptic_quality'] = streaming_metrics.streaming_panoptic_quality(
                category_label,
                instance_label,
                category_prediction,
                instance_prediction,
                num_classes=19,
                max_instances_per_category=256,
                ignored_label=255,
                offset=256 * 256)
        metric_map[
            'parsing_covering'] = streaming_metrics.streaming_parsing_covering(
                category_label,
                instance_label,
                category_prediction,
                instance_prediction,
                num_classes=19,
                max_instances_per_category=256,
                ignored_label=255,
                offset=256 * 256,
                normalize_by_image_size=True)
        metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map(
            metric_map)

        summary_ops = []
        for metric_name, metric_value in metrics_to_values.iteritems():
            if metric_name == 'panoptic_quality':
                [pq, sq, rq, total_tp, total_fn,
                 total_fp] = tf.unstack(metric_value, 6, axis=0)
                panoptic_metrics = {
                    # Panoptic quality.
                    'pq': pq,
                    # Segmentation quality.
                    'sq': sq,
                    # Recognition quality.
                    'rq': rq,
                    # Total true positives.
                    'total_tp': total_tp,
                    # Total false negatives.
                    'total_fn': total_fn,
                    # Total false positives.
                    'total_fp': total_fp,
                }
                # Find the valid classes that will be used for evaluation. We will
                # ignore the `ignore_label` class and other classes which have (tp + fn
                # + fp) equal to 0.
                valid_classes = tf.logical_and(
                    tf.not_equal(tf.range(0, dataset.num_of_classes),
                                 dataset.ignore_label),
                    tf.not_equal(total_tp + total_fn + total_fp, 0))
                for target_metric, target_value in panoptic_metrics.iteritems(
                ):
                    output_metric_name = '{}_{}'.format(
                        metric_name, target_metric)
                    op = tf.summary.scalar(
                        output_metric_name,
                        tf.reduce_mean(
                            tf.boolean_mask(target_value, valid_classes)))
                    op = tf.Print(op, [target_value],
                                  output_metric_name + '_classwise: ',
                                  summarize=dataset.num_of_classes)
                    op = tf.Print(op, [
                        tf.reduce_mean(
                            tf.boolean_mask(target_value, valid_classes))
                    ],
                                  output_metric_name + '_mean: ',
                                  summarize=1)
                    summary_ops.append(op)
            elif metric_name == 'parsing_covering':
                [
                    per_class_covering, total_per_class_weighted_ious,
                    total_per_class_gt_areas
                ] = tf.unstack(metric_value, 3, axis=0)
                # Find the valid classes that will be used for evaluation. We will
                # ignore the `void_label` class and other classes which have
                # total_per_class_weighted_ious + total_per_class_gt_areas equal to 0.
                valid_classes = tf.logical_and(
                    tf.not_equal(tf.range(0, dataset.num_of_classes),
                                 dataset.ignore_label),
                    tf.not_equal(
                        total_per_class_weighted_ious +
                        total_per_class_gt_areas, 0))
                op = tf.summary.scalar(
                    metric_name,
                    tf.reduce_mean(
                        tf.boolean_mask(per_class_covering, valid_classes)))
                op = tf.Print(op, [per_class_covering],
                              metric_name + '_classwise: ',
                              summarize=dataset.num_of_classes)
                op = tf.Print(op, [
                    tf.reduce_mean(
                        tf.boolean_mask(per_class_covering, valid_classes))
                ],
                              metric_name + '_mean: ',
                              summarize=1)
                summary_ops.append(op)
            else:
                raise ValueError('The metric_name "%s" is not supported.' %
                                 metric_name)

        num_eval_iters = None
        if FLAGS.max_number_of_evaluations > 0:
            num_eval_iters = FLAGS.max_number_of_evaluations

        if FLAGS.quantize_delay_step >= 0:
            contrib_quantize.create_eval_graph()

        contrib_tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=contrib_tfprof.model_analyzer.
            TRAINABLE_VARS_PARAMS_STAT_OPTIONS)
        contrib_tfprof.model_analyzer.print_model_analysis(
            tf.get_default_graph(),
            tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS)

        metric_values = slim.evaluation.evaluation_loop(
            master=FLAGS.master,
            checkpoint_dir=FLAGS.checkpoint_dir,
            logdir=FLAGS.eval_logdir,
            num_evals=20,
            eval_op=metrics_to_updates.values(),
            final_op=metrics_to_values.values(),
            summary_op=tf.summary.merge(summary_ops),
            max_number_of_evaluations=FLAGS.max_number_of_evaluations,
            eval_interval_secs=FLAGS.eval_interval_secs)
예제 #3
0
  def test_streaming_metric_on_multiple_images(self):
    num_classes = 7
    offset = 256 * 256

    bird_gt_instance_class_map = {
        92: 5,
        176: 3,
        255: 4,
    }
    cat_gt_instance_class_map = {
        0: 0,
        255: 6,
    }
    team_gt_instance_class_map = {
        0: 0,
        47: 1,
        97: 1,
        133: 1,
        150: 1,
        174: 1,
        198: 2,
        215: 1,
        244: 1,
        255: 1,
    }
    test_image = collections.namedtuple(
        'TestImage',
        ['gt_class_map', 'gt_path', 'pred_inst_path', 'pred_class_path'])
    test_images = [
        test_image(bird_gt_instance_class_map, 'bird_gt.png',
                   'bird_pred_instance.png', 'bird_pred_class.png'),
        test_image(cat_gt_instance_class_map, 'cat_gt.png',
                   'cat_pred_instance.png', 'cat_pred_class.png'),
        test_image(team_gt_instance_class_map, 'team_gt_instance.png',
                   'team_pred_instance.png', 'team_pred_class.png'),
    ]

    gt_classes = []
    gt_instances = []
    pred_classes = []
    pred_instances = []
    for test_image in test_images:
      (image_gt_instances,
       image_gt_classes) = test_utils.panoptic_segmentation_with_class_map(
           test_image.gt_path, test_image.gt_class_map)
      gt_classes.append(image_gt_classes)
      gt_instances.append(image_gt_instances)

      pred_classes.append(
          test_utils.read_segmentation_with_rgb_color_map(
              test_image.pred_class_path, _CLASS_COLOR_MAP))
      pred_instances.append(
          test_utils.read_test_image(test_image.pred_inst_path, mode='L'))

    gt_class_tensor = tf.placeholder(tf.uint16)
    gt_instance_tensor = tf.placeholder(tf.uint16)
    pred_class_tensor = tf.placeholder(tf.uint16)
    pred_instance_tensor = tf.placeholder(tf.uint16)
    qualities, update_pq = streaming_metrics.streaming_panoptic_quality(
        gt_class_tensor,
        gt_instance_tensor,
        pred_class_tensor,
        pred_instance_tensor,
        num_classes=num_classes,
        max_instances_per_category=256,
        ignored_label=0,
        offset=offset)
    pq, sq, rq, total_tp, total_fn, total_fp = tf.unstack(qualities, 6, axis=0)
    with self.session() as sess:
      sess.run(tf.local_variables_initializer())
      for pred_class, pred_instance, gt_class, gt_instance in six.moves.zip(
          pred_classes, pred_instances, gt_classes, gt_instances):
        sess.run(
            update_pq,
            feed_dict={
                gt_class_tensor: gt_class,
                gt_instance_tensor: gt_instance,
                pred_class_tensor: pred_class,
                pred_instance_tensor: pred_instance
            })
      (result_pq, result_sq, result_rq, result_total_tp, result_total_fn,
       result_total_fp) = sess.run(
           [pq, sq, rq, total_tp, total_fn, total_fp],
           feed_dict={
               gt_class_tensor: 0,
               gt_instance_tensor: 0,
               pred_class_tensor: 0,
               pred_instance_tensor: 0
           })
    np.testing.assert_array_almost_equal(
        result_pq,
        [4.3107, 0.7024, 0.54069, 0.745353, 0.85768, 0.99107, 0.77410],
        decimal=4)
    np.testing.assert_array_almost_equal(
        result_sq, [5.3883, 0.7526, 0.5407, 0.7454, 0.8577, 0.9911, 0.7741],
        decimal=4)
    np.testing.assert_array_almost_equal(
        result_rq, [0.8, 0.9333, 1., 1., 1., 1., 1.], decimal=4)
    np.testing.assert_array_almost_equal(
        result_total_tp, [2., 7., 1., 1., 1., 1., 1.], decimal=4)
    np.testing.assert_array_almost_equal(
        result_total_fn, [0., 1., 0., 0., 0., 0., 0.], decimal=4)
    np.testing.assert_array_almost_equal(
        result_total_fp, [1., 0., 0., 0., 0., 0., 0.], decimal=4)