def testForwardpassDeepLabv3plus(self): crop_size = [33, 33] outputs_to_num_classes = {'semantic': 3} model_options = common.ModelOptions( outputs_to_num_classes, crop_size, output_stride=16 )._replace( add_image_level_feature=True, aspp_with_batch_norm=True, logits_kernel_size=1, model_variant='mobilenet_v2') # Employ MobileNetv2 for fast test. g = tf.Graph() with g.as_default(): with self.test_session(graph=g) as sess: inputs = tf.random_uniform( (1, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_logits = model.multi_scale_logits( inputs, model_options, image_pyramid=[1.0]) sess.run(tf.global_variables_initializer()) outputs_to_scales_to_logits = sess.run(outputs_to_scales_to_logits) # Check computed results for each output type. for output in outputs_to_num_classes: scales_to_logits = outputs_to_scales_to_logits[output] # Expect only one output. self.assertEquals(len(scales_to_logits), 1) for logits in scales_to_logits.values(): self.assertTrue(logits.any())
def testBuildDeepLabWithDensePredictionCell(self): batch_size = 1 crop_size = [33, 33] outputs_to_num_classes = {'semantic': 2} expected_endpoints = ['merged_logits'] dense_prediction_cell_config = [ {'kernel': 3, 'rate': [1, 6], 'op': 'conv', 'input': -1}, {'kernel': 3, 'rate': [18, 15], 'op': 'conv', 'input': 0}, ] model_options = common.ModelOptions( outputs_to_num_classes, crop_size, output_stride=16)._replace( aspp_with_batch_norm=True, model_variant='mobilenet_v2', dense_prediction_cell_config=dense_prediction_cell_config) g = tf.Graph() with g.as_default(): with self.test_session(graph=g): inputs = tf.random_uniform( (batch_size, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_model_results = model.multi_scale_logits( inputs, model_options, image_pyramid=[1.0]) for output in outputs_to_num_classes: scales_to_model_results = outputs_to_scales_to_model_results[output] self.assertListEqual( list(scales_to_model_results), expected_endpoints) self.assertEqual(len(scales_to_model_results), 1)
def _build_deeplab(samples, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: samples: Feature map from input pipeline. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. Returns: A map of maps from output_type (e.g., semantic prediction) to a dictionary of multi-scale logits names to logits. For each output_type, the dictionary has keys which correspond to the scales and values which correspond to the logits. For example, if `scales` equals [1.0, 1.5], then the keys would include 'merged_logits', 'logits_1.00' and 'logits_1.50'. """ tf.summary.image('image', samples['image'], 4) if 'label' in samples: label_summary(samples['label']) if FLAGS.use_ref_exp: tf.summary.text('ref', samples[model_input.REF_EXP_ID]) outputs_to_scales_to_logits = model.multi_scale_logits( samples['image'], samples, FLAGS, outputs_to_num_classes=outputs_to_num_classes, image_pyramid=FLAGS.image_pyramid, merge_method=FLAGS.merge_method, atrous_rates=FLAGS.atrous_rates, add_image_level_feature=FLAGS.add_image_level_feature, aspp_with_batch_norm=FLAGS.aspp_with_batch_norm, aspp_with_separable_conv=FLAGS.aspp_with_separable_conv, multi_grid=FLAGS.multi_grid, depth_multiplier=FLAGS.depth_multiplier, output_stride=FLAGS.output_stride, decoder_output_stride=FLAGS.decoder_output_stride, decoder_use_separable_conv=FLAGS.decoder_use_separable_conv, logits_kernel_size=FLAGS.logits_kernel_size, crop_size=[FLAGS.image_size, FLAGS.image_size], model_variant=FLAGS.model_variant, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) for output, num_classes in outputs_to_num_classes.iteritems(): logits_summary(outputs_to_scales_to_logits[output]['merged_logits']) train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples['label'], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, scope=output) return outputs_to_scales_to_logits
def _build_deeplab(iterator, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: iterator: An iterator of type tf.data.Iterator for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. """ samples = iterator.get_next() # Add name to input and label nodes so we can add to summary. #samples[common.IMAGE].set_shape([FLAGS.train_batch_size, FLAGS.train_crop_size[0], FLAGS.train_crop_size[1], 3]) samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=[int(sz) for sz in FLAGS.train_crop_size], atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride, ) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, skips=FLAGS.skips, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm, nas_training_hyper_parameters={ 'drop_path_keep_prob': FLAGS.drop_path_keep_prob, 'total_training_steps': FLAGS.training_number_of_steps, }) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity( output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, hard_example_mining_step=FLAGS.hard_example_mining_step, top_k_percent_pixels=FLAGS.top_k_percent_pixels, scope=output) # Log the summary _log_summaries(samples[common.IMAGE], samples[common.LABEL], num_classes, output_type_dict[model.MERGED_LOGITS_SCOPE])
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: inputs_queue: A prefetch queue for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. Returns: A map of maps from output_type (e.g., semantic prediction) to a dictionary of multi-scale logits names to logits. For each output_type, the dictionary has keys which correspond to the scales and values which correspond to the logits. For example, if `scales` equals [1.0, 1.5], then the keys would include 'merged_logits', 'logits_1.00' and 'logits_1.50'. """ samples = inputs_queue.dequeue() # Add name to input and label nodes so we can add to summary. samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) # Add name to graph node so we can add to summary. output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.MERGED_LOGITS_SCOPE] = tf.identity( output_type_dict[model.MERGED_LOGITS_SCOPE], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): loss_func = train_utils.add_softmax_cross_entropy_loss_for_each_scale if FLAGS.loss_function in 'lovasz': loss_func = train_utils.add_lovasz_softmax_loss_for_each_scale loss_func(outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, scope=output) return outputs_to_scales_to_logits
def testBuildDeepLabv2(self): batch_size = 2 crop_size = [41, 41] # Test with two image_pyramids. image_pyramids = [[1], [0.5, 1]] # Test two model variants. model_variants = ['xception_65', 'mobilenet_v2'] # Test with two output_types. outputs_to_num_classes = {'semantic': 3, 'direction': 2} expected_endpoints = [['merged_logits'], ['merged_logits', 'logits_0.50', 'logits_1.00']] expected_num_logits = [1, 3] for model_variant in model_variants: model_options = common.ModelOptions( outputs_to_num_classes)._replace( add_image_level_feature=False, aspp_with_batch_norm=False, aspp_with_separable_conv=False, model_variant=model_variant) for i, image_pyramid in enumerate(image_pyramids): g = tf.Graph() with g.as_default(): with self.test_session(graph=g): inputs = tf.random_uniform( (batch_size, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_logits = model.multi_scale_logits( inputs, model_options, image_pyramid=image_pyramid) # Check computed results for each output type. for output in outputs_to_num_classes: scales_to_logits = outputs_to_scales_to_logits[ output] self.assertListEqual( sorted(scales_to_logits.keys()), sorted(expected_endpoints[i])) # Expected number of logits = len(image_pyramid) + 1, since the # last logits is merged from all the scales. self.assertEqual(len(scales_to_logits), expected_num_logits[i])
def testBuildDeepLabv2(self): batch_size = 2 crop_size = [41, 41] # Test with two image_pyramids. image_pyramids = [[1], [0.5, 1]] # Test two model variants. model_variants = ['xception_65', 'mobilenet_v2'] # Test with two output_types. outputs_to_num_classes = {'semantic': 3, 'direction': 2} expected_endpoints = [['merged_logits'], ['merged_logits', 'logits_0.50', 'logits_1.00']] expected_num_logits = [1, 3] for model_variant in model_variants: model_options = common.ModelOptions(outputs_to_num_classes)._replace( add_image_level_feature=False, aspp_with_batch_norm=False, aspp_with_separable_conv=False, model_variant=model_variant) for i, image_pyramid in enumerate(image_pyramids): g = tf.Graph() with g.as_default(): with self.test_session(graph=g): inputs = tf.random_uniform( (batch_size, crop_size[0], crop_size[1], 3)) outputs_to_scales_to_logits = model.multi_scale_logits( inputs, model_options, image_pyramid=image_pyramid) # Check computed results for each output type. for output in outputs_to_num_classes: scales_to_logits = outputs_to_scales_to_logits[output] self.assertListEqual(sorted(list(scales_to_logits.keys())), sorted(expected_endpoints[i])) # Expected number of logits = len(image_pyramid) + 1, since the # last logits is merged from all the scales. self.assertEqual(len(scales_to_logits), expected_num_logits[i])
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label): """Builds a clone of DeepLab. Args: inputs_queue: A prefetch queue for images and labels. outputs_to_num_classes: A map from output type to the number of classes. For example, for the task of semantic segmentation with 21 semantic classes, we would have outputs_to_num_classes['semantic'] = 21. ignore_label: Ignore label. Returns: A map of maps from output_type (e.g., semantic prediction) to a dictionary of multi-scale logits names to logits. For each output_type, the dictionary has keys which correspond to the scales and values which correspond to the logits. For example, if `scales` equals [1.0, 1.5], then the keys would include 'merged_logits', 'logits_1.00' and 'logits_1.50'. """ samples = inputs_queue.dequeue() model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) for output, num_classes in outputs_to_num_classes.items(): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, scope=output) return outputs_to_scales_to_logits
def _build_deeplab(inputs_queue, outputs_to_num_classes, ignore_label): samples = inputs_queue.dequeue() samples[common.IMAGE] = tf.identity(samples[common.IMAGE], name=common.IMAGE) samples[common.LABEL] = tf.identity(samples[common.LABEL], name=common.LABEL) model_options = common.ModelOptions( outputs_to_num_classes=outputs_to_num_classes, crop_size=FLAGS.train_crop_size, atrous_rates=FLAGS.atrous_rates, output_stride=FLAGS.output_stride) outputs_to_scales_to_logits = model.multi_scale_logits( samples[common.IMAGE], model_options=model_options, image_pyramid=FLAGS.image_pyramid, weight_decay=FLAGS.weight_decay, is_training=True, fine_tune_batch_norm=FLAGS.fine_tune_batch_norm) output_type_dict = outputs_to_scales_to_logits[common.OUTPUT_TYPE] output_type_dict[model.get_merged_logits_scope()] = tf.identity( output_type_dict[model.get_merged_logits_scope()], name=common.OUTPUT_TYPE) for output, num_classes in six.iteritems(outputs_to_num_classes): train_utils.add_softmax_cross_entropy_loss_for_each_scale( outputs_to_scales_to_logits[output], samples[common.LABEL], num_classes, ignore_label, loss_weight=1.0, upsample_logits=FLAGS.upsample_logits, scope=output) return outputs_to_scales_to_logits