def test_streaming_metric_on_single_image(self): offset = 256 * 256 instance_class_map = { 0: 0, 47: 1, 97: 1, 133: 1, 150: 1, 174: 1, 198: 2, 215: 1, 244: 1, 255: 1, } gt_instances, gt_classes = test_utils.panoptic_segmentation_with_class_map( 'team_gt_instance.png', instance_class_map) pred_classes = test_utils.read_segmentation_with_rgb_color_map( 'team_pred_class.png', _CLASS_COLOR_MAP) pred_instances = test_utils.read_test_image( 'team_pred_instance.png', mode='L') gt_class_tensor = tf.placeholder(tf.uint16) gt_instance_tensor = tf.placeholder(tf.uint16) pred_class_tensor = tf.placeholder(tf.uint16) pred_instance_tensor = tf.placeholder(tf.uint16) qualities, update_pq = streaming_metrics.streaming_panoptic_quality( gt_class_tensor, gt_instance_tensor, pred_class_tensor, pred_instance_tensor, num_classes=3, max_instances_per_category=256, ignored_label=0, offset=offset) pq, sq, rq, total_tp, total_fn, total_fp = tf.unstack(qualities, 6, axis=0) feed_dict = { gt_class_tensor: gt_classes, gt_instance_tensor: gt_instances, pred_class_tensor: pred_classes, pred_instance_tensor: pred_instances } with self.session() as sess: sess.run(tf.local_variables_initializer()) sess.run(update_pq, feed_dict=feed_dict) (result_pq, result_sq, result_rq, result_total_tp, result_total_fn, result_total_fp) = sess.run([pq, sq, rq, total_tp, total_fn, total_fp], feed_dict=feed_dict) np.testing.assert_array_almost_equal( result_pq, [2.06104, 0.7024, 0.54069], decimal=4) np.testing.assert_array_almost_equal( result_sq, [2.06104, 0.7526, 0.54069], decimal=4) np.testing.assert_array_almost_equal(result_rq, [1., 0.9333, 1.], decimal=4) np.testing.assert_array_almost_equal( result_total_tp, [1., 7., 1.], decimal=4) np.testing.assert_array_almost_equal( result_total_fn, [0., 1., 0.], decimal=4) np.testing.assert_array_almost_equal( result_total_fp, [0., 0., 0.], decimal=4)
def main(unused_argv): tf.logging.set_verbosity(tf.logging.INFO) dataset = data_generator.Dataset( dataset_name=FLAGS.dataset, split_name=FLAGS.eval_split, dataset_dir=FLAGS.dataset_dir, batch_size=FLAGS.eval_batch_size, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], min_resize_value=FLAGS.min_resize_value, max_resize_value=FLAGS.max_resize_value, resize_factor=FLAGS.resize_factor, model_variant=FLAGS.model_variant, num_readers=2, is_training=False, should_shuffle=False, should_repeat=False) tf.gfile.MakeDirs(FLAGS.eval_logdir) tf.logging.info('Evaluating on %s set', FLAGS.eval_split) with tf.Graph().as_default(): samples = dataset.get_one_shot_iterator().get_next() model_options = common.ModelOptions( outputs_to_num_classes={ common.OUTPUT_TYPE: dataset.num_of_classes, common.INSTANCE: 1, common.OFFSET: 2 }, crop_size=[int(sz) for sz in FLAGS.eval_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) # Set shape in order for tf.contrib.tfprof.model_analyzer to work properly. samples[common.IMAGE].set_shape([ FLAGS.eval_batch_size, int(FLAGS.eval_crop_size[0]), int(FLAGS.eval_crop_size[1]), 3 ]) if tuple(FLAGS.eval_scales) == (1.0, ): tf.logging.info('Performing single-scale test.') predictions = model.predict_labels( samples[common.IMAGE], model_options, image_pyramid=FLAGS.image_pyramid) else: tf.logging.info('Performing multi-scale test.') if FLAGS.quantize_delay_step >= 0: raise ValueError( 'Quantize mode is not supported with multi-scale test.') predictions_semantic = predictions[common.OUTPUT_TYPE] predictions_center_points = predictions[common.INSTANCE] predictions_offset_vectors = predictions[common.OFFSET] # tf Non-maxima Suppression # Pooling based NMS for Pooling Instance Centers # Filtering predictions that are less than 0.1 instance_prediction = generate_instance_segmentation( predictions_semantic, predictions_center_points, predictions_offset_vectors) category_prediction = tf.squeeze(predictions_semantic) category_label = tf.squeeze(samples[common.LABEL][0]) not_ignore_mask = tf.not_equal(category_label, 255) category_label = tf.cast( category_label * tf.cast(not_ignore_mask, tf.int32), tf.int32) instance_label = tf.squeeze(samples[common.LABEL_INSTANCE_IDS][0]) category_prediction = category_prediction * tf.cast( not_ignore_mask, tf.int64) instance_prediction = instance_prediction * tf.cast( not_ignore_mask, tf.int64) # Define the evaluation metric. metric_map = {} metric_map[ 'panoptic_quality'] = streaming_metrics.streaming_panoptic_quality( category_label, instance_label, category_prediction, instance_prediction, num_classes=19, max_instances_per_category=256, ignored_label=255, offset=256 * 256) metric_map[ 'parsing_covering'] = streaming_metrics.streaming_parsing_covering( category_label, instance_label, category_prediction, instance_prediction, num_classes=19, max_instances_per_category=256, ignored_label=255, offset=256 * 256, normalize_by_image_size=True) metrics_to_values, metrics_to_updates = slim.metrics.aggregate_metric_map( metric_map) summary_ops = [] for metric_name, metric_value in metrics_to_values.iteritems(): if metric_name == 'panoptic_quality': [pq, sq, rq, total_tp, total_fn, total_fp] = tf.unstack(metric_value, 6, axis=0) panoptic_metrics = { # Panoptic quality. 'pq': pq, # Segmentation quality. 'sq': sq, # Recognition quality. 'rq': rq, # Total true positives. 'total_tp': total_tp, # Total false negatives. 'total_fn': total_fn, # Total false positives. 'total_fp': total_fp, } # Find the valid classes that will be used for evaluation. We will # ignore the `ignore_label` class and other classes which have (tp + fn # + fp) equal to 0. valid_classes = tf.logical_and( tf.not_equal(tf.range(0, dataset.num_of_classes), dataset.ignore_label), tf.not_equal(total_tp + total_fn + total_fp, 0)) for target_metric, target_value in panoptic_metrics.iteritems( ): output_metric_name = '{}_{}'.format( metric_name, target_metric) op = tf.summary.scalar( output_metric_name, tf.reduce_mean( tf.boolean_mask(target_value, valid_classes))) op = tf.Print(op, [target_value], output_metric_name + '_classwise: ', summarize=dataset.num_of_classes) op = tf.Print(op, [ tf.reduce_mean( tf.boolean_mask(target_value, valid_classes)) ], output_metric_name + '_mean: ', summarize=1) summary_ops.append(op) elif metric_name == 'parsing_covering': [ per_class_covering, total_per_class_weighted_ious, total_per_class_gt_areas ] = tf.unstack(metric_value, 3, axis=0) # Find the valid classes that will be used for evaluation. We will # ignore the `void_label` class and other classes which have # total_per_class_weighted_ious + total_per_class_gt_areas equal to 0. valid_classes = tf.logical_and( tf.not_equal(tf.range(0, dataset.num_of_classes), dataset.ignore_label), tf.not_equal( total_per_class_weighted_ious + total_per_class_gt_areas, 0)) op = tf.summary.scalar( metric_name, tf.reduce_mean( tf.boolean_mask(per_class_covering, valid_classes))) op = tf.Print(op, [per_class_covering], metric_name + '_classwise: ', summarize=dataset.num_of_classes) op = tf.Print(op, [ tf.reduce_mean( tf.boolean_mask(per_class_covering, valid_classes)) ], metric_name + '_mean: ', summarize=1) summary_ops.append(op) else: raise ValueError('The metric_name "%s" is not supported.' % metric_name) num_eval_iters = None if FLAGS.max_number_of_evaluations > 0: num_eval_iters = FLAGS.max_number_of_evaluations if FLAGS.quantize_delay_step >= 0: contrib_quantize.create_eval_graph() contrib_tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=contrib_tfprof.model_analyzer. TRAINABLE_VARS_PARAMS_STAT_OPTIONS) contrib_tfprof.model_analyzer.print_model_analysis( tf.get_default_graph(), tfprof_options=contrib_tfprof.model_analyzer.FLOAT_OPS_OPTIONS) metric_values = slim.evaluation.evaluation_loop( master=FLAGS.master, checkpoint_dir=FLAGS.checkpoint_dir, logdir=FLAGS.eval_logdir, num_evals=20, eval_op=metrics_to_updates.values(), final_op=metrics_to_values.values(), summary_op=tf.summary.merge(summary_ops), max_number_of_evaluations=FLAGS.max_number_of_evaluations, eval_interval_secs=FLAGS.eval_interval_secs)
def test_streaming_metric_on_multiple_images(self): num_classes = 7 offset = 256 * 256 bird_gt_instance_class_map = { 92: 5, 176: 3, 255: 4, } cat_gt_instance_class_map = { 0: 0, 255: 6, } team_gt_instance_class_map = { 0: 0, 47: 1, 97: 1, 133: 1, 150: 1, 174: 1, 198: 2, 215: 1, 244: 1, 255: 1, } test_image = collections.namedtuple( 'TestImage', ['gt_class_map', 'gt_path', 'pred_inst_path', 'pred_class_path']) test_images = [ test_image(bird_gt_instance_class_map, 'bird_gt.png', 'bird_pred_instance.png', 'bird_pred_class.png'), test_image(cat_gt_instance_class_map, 'cat_gt.png', 'cat_pred_instance.png', 'cat_pred_class.png'), test_image(team_gt_instance_class_map, 'team_gt_instance.png', 'team_pred_instance.png', 'team_pred_class.png'), ] gt_classes = [] gt_instances = [] pred_classes = [] pred_instances = [] for test_image in test_images: (image_gt_instances, image_gt_classes) = test_utils.panoptic_segmentation_with_class_map( test_image.gt_path, test_image.gt_class_map) gt_classes.append(image_gt_classes) gt_instances.append(image_gt_instances) pred_classes.append( test_utils.read_segmentation_with_rgb_color_map( test_image.pred_class_path, _CLASS_COLOR_MAP)) pred_instances.append( test_utils.read_test_image(test_image.pred_inst_path, mode='L')) gt_class_tensor = tf.placeholder(tf.uint16) gt_instance_tensor = tf.placeholder(tf.uint16) pred_class_tensor = tf.placeholder(tf.uint16) pred_instance_tensor = tf.placeholder(tf.uint16) qualities, update_pq = streaming_metrics.streaming_panoptic_quality( gt_class_tensor, gt_instance_tensor, pred_class_tensor, pred_instance_tensor, num_classes=num_classes, max_instances_per_category=256, ignored_label=0, offset=offset) pq, sq, rq, total_tp, total_fn, total_fp = tf.unstack(qualities, 6, axis=0) with self.session() as sess: sess.run(tf.local_variables_initializer()) for pred_class, pred_instance, gt_class, gt_instance in six.moves.zip( pred_classes, pred_instances, gt_classes, gt_instances): sess.run( update_pq, feed_dict={ gt_class_tensor: gt_class, gt_instance_tensor: gt_instance, pred_class_tensor: pred_class, pred_instance_tensor: pred_instance }) (result_pq, result_sq, result_rq, result_total_tp, result_total_fn, result_total_fp) = sess.run( [pq, sq, rq, total_tp, total_fn, total_fp], feed_dict={ gt_class_tensor: 0, gt_instance_tensor: 0, pred_class_tensor: 0, pred_instance_tensor: 0 }) np.testing.assert_array_almost_equal( result_pq, [4.3107, 0.7024, 0.54069, 0.745353, 0.85768, 0.99107, 0.77410], decimal=4) np.testing.assert_array_almost_equal( result_sq, [5.3883, 0.7526, 0.5407, 0.7454, 0.8577, 0.9911, 0.7741], decimal=4) np.testing.assert_array_almost_equal( result_rq, [0.8, 0.9333, 1., 1., 1., 1., 1.], decimal=4) np.testing.assert_array_almost_equal( result_total_tp, [2., 7., 1., 1., 1., 1., 1.], decimal=4) np.testing.assert_array_almost_equal( result_total_fn, [0., 1., 0., 0., 0., 0., 0.], decimal=4) np.testing.assert_array_almost_equal( result_total_fp, [1., 0., 0., 0., 0., 0., 0.], decimal=4)