def build_inputs( self, params: exp_cfg.DataConfig, input_context: Optional[tf.distribute.InputContext] = None): """Build input dataset.""" if params.tfds_name: if params.tfds_name in tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP: decoder = tfds_detection_decoders.TFDS_ID_TO_DECODER_MAP[ params.tfds_name]() else: raise ValueError('TFDS {} is not supported'.format( params.tfds_name)) else: decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = retinanet_input.Parser( output_size=self.task_config.model.input_size[:2], min_level=self.task_config.model.min_level, max_level=self.task_config.model.max_level, num_scales=self.task_config.model.anchor.num_scales, aspect_ratios=self.task_config.model.anchor.aspect_ratios, anchor_size=self.task_config.model.anchor.anchor_size, dtype=params.dtype, match_threshold=params.parser.match_threshold, unmatched_threshold=params.parser.unmatched_threshold, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, skip_crowd_during_training=params.parser. skip_crowd_during_training, max_num_instances=params.parser.max_num_instances) reader = input_reader_factory.input_reader_generator( params, dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), decoder_fn=decoder.decode, parser_fn=parser.parse_fn(params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def build_inputs(self, params, input_context=None): """Build input dataset.""" decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( regenerate_source_id=decoder_cfg.regenerate_source_id) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, regenerate_source_id=decoder_cfg.regenerate_source_id) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = retinanet_input.Parser( output_size=self.task_config.model.input_size[:2], min_level=self.task_config.model.min_level, max_level=self.task_config.model.max_level, num_scales=self.task_config.model.anchor.num_scales, aspect_ratios=self.task_config.model.anchor.aspect_ratios, anchor_size=self.task_config.model.anchor.anchor_size, dtype=params.dtype, match_threshold=params.parser.match_threshold, unmatched_threshold=params.parser.unmatched_threshold, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, skip_crowd_during_training=params.parser. skip_crowd_during_training, max_num_instances=params.parser.max_num_instances) reader = input_reader.InputReader(params, dataset_fn=tf.data.TFRecordDataset, decoder_fn=decoder.decode, parser_fn=parser.parse_fn( params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def testRetinanetInputReader(self, output_size, skip_crowd_during_training, use_autoaugment, is_training): batch_size = 2 min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [0.5, 1.0, 2.0] anchor_size = 3 max_num_instances = 100 params = cfg.DataConfig( input_path='/placer/prod/home/snaggletooth/test/data/coco/val*', global_batch_size=batch_size, is_training=is_training) decoder = tf_example_decoder.TfExampleDecoder() parser = retinanet_input.Parser( output_size=output_size, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, skip_crowd_during_training=skip_crowd_during_training, use_autoaugment=use_autoaugment, max_num_instances=max_num_instances, dtype='bfloat16') reader = input_reader.InputReader(params, dataset_fn=tf.data.TFRecordDataset, decoder_fn=decoder.decode, parser_fn=parser.parse_fn( params.is_training)) dataset = reader.read() iterator = iter(dataset) image, labels = next(iterator) np_image = image.numpy() np_labels = tf.nest.map_structure(lambda x: x.numpy(), labels) # Checks image shape. self.assertEqual(list(np_image.shape), [batch_size, output_size[0], output_size[1], 3]) # Checks keys in labels. if is_training: self.assertCountEqual(np_labels.keys(), [ 'cls_targets', 'box_targets', 'anchor_boxes', 'cls_weights', 'box_weights', 'image_info' ]) else: self.assertCountEqual(np_labels.keys(), [ 'cls_targets', 'box_targets', 'anchor_boxes', 'cls_weights', 'box_weights', 'groundtruths', 'image_info' ]) # Checks shapes of `image_info` and `anchor_boxes`. self.assertEqual(np_labels['image_info'].shape, (batch_size, 4, 2)) n_anchors = 0 for level in range(min_level, max_level + 1): stride = 2**level output_size_l = [output_size[0] / stride, output_size[1] / stride] anchors_per_location = num_scales * len(aspect_ratios) self.assertEqual(list(np_labels['anchor_boxes'][level].shape), [ batch_size, output_size_l[0], output_size_l[1], 4 * anchors_per_location ]) n_anchors += output_size_l[0] * output_size_l[ 1] * anchors_per_location # Checks shapes of training objectives. self.assertEqual(np_labels['cls_weights'].shape, (batch_size, n_anchors)) for level in range(min_level, max_level + 1): stride = 2**level output_size_l = [output_size[0] / stride, output_size[1] / stride] anchors_per_location = num_scales * len(aspect_ratios) self.assertEqual(list(np_labels['cls_targets'][level].shape), [ batch_size, output_size_l[0], output_size_l[1], anchors_per_location ]) self.assertEqual(list(np_labels['box_targets'][level].shape), [ batch_size, output_size_l[0], output_size_l[1], 4 * anchors_per_location ]) # Checks shape of groundtruths for eval. if not is_training: self.assertEqual(np_labels['groundtruths']['source_id'].shape, (batch_size, )) self.assertEqual(np_labels['groundtruths']['classes'].shape, (batch_size, max_num_instances)) self.assertEqual(np_labels['groundtruths']['boxes'].shape, (batch_size, max_num_instances, 4)) self.assertEqual(np_labels['groundtruths']['areas'].shape, (batch_size, max_num_instances)) self.assertEqual(np_labels['groundtruths']['is_crowds'].shape, (batch_size, max_num_instances))