def build_inputs( self, params: exp_cfg.DataConfig, input_context: Optional[tf.distribute.InputContext] = None): """Build input dataset.""" decoder_cfg = params.decoder.get() if params.decoder.type == 'simple_decoder': decoder = tf_example_decoder.TfExampleDecoder( include_mask=self._task_config.model.include_mask, regenerate_source_id=decoder_cfg.regenerate_source_id, mask_binarize_threshold=decoder_cfg.mask_binarize_threshold) elif params.decoder.type == 'label_map_decoder': decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap( label_map=decoder_cfg.label_map, include_mask=self._task_config.model.include_mask, regenerate_source_id=decoder_cfg.regenerate_source_id, mask_binarize_threshold=decoder_cfg.mask_binarize_threshold) else: raise ValueError('Unknown decoder type: {}!'.format( params.decoder.type)) parser = maskrcnn_input.Parser( output_size=self.task_config.model.input_size[:2], min_level=self.task_config.model.min_level, max_level=self.task_config.model.max_level, num_scales=self.task_config.model.anchor.num_scales, aspect_ratios=self.task_config.model.anchor.aspect_ratios, anchor_size=self.task_config.model.anchor.anchor_size, dtype=params.dtype, rpn_match_threshold=params.parser.rpn_match_threshold, rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold, rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im, rpn_fg_fraction=params.parser.rpn_fg_fraction, aug_rand_hflip=params.parser.aug_rand_hflip, aug_scale_min=params.parser.aug_scale_min, aug_scale_max=params.parser.aug_scale_max, skip_crowd_during_training=params.parser. skip_crowd_during_training, max_num_instances=params.parser.max_num_instances, include_mask=self._task_config.model.include_mask, mask_crop_size=params.parser.mask_crop_size) reader = input_reader_factory.input_reader_generator( params, dataset_fn=dataset_fn.pick_dataset_fn(params.file_type), decoder_fn=decoder.decode, parser_fn=parser.parse_fn(params.is_training)) dataset = reader.read(input_context=input_context) return dataset
def testMaskRCNNInputReader(self, output_size, skip_crowd_during_training, include_mask, is_training): min_level = 3 max_level = 7 num_scales = 3 aspect_ratios = [1.0, 2.0, 0.5] max_num_instances = 100 batch_size = 2 mask_crop_size = 112 anchor_size = 4.0 params = cfg.DataConfig( input_path='/placer/prod/home/snaggletooth/test/data/coco/val*', global_batch_size=batch_size, is_training=is_training) parser = maskrcnn_input.Parser( output_size=output_size, min_level=min_level, max_level=max_level, num_scales=num_scales, aspect_ratios=aspect_ratios, anchor_size=anchor_size, rpn_match_threshold=0.7, rpn_unmatched_threshold=0.3, rpn_batch_size_per_im=256, rpn_fg_fraction=0.5, aug_rand_hflip=True, aug_scale_min=0.8, aug_scale_max=1.2, skip_crowd_during_training=skip_crowd_during_training, max_num_instances=max_num_instances, include_mask=include_mask, mask_crop_size=mask_crop_size, dtype='bfloat16') decoder = tf_example_decoder.TfExampleDecoder( include_mask=include_mask) reader = input_reader.InputReader(params, dataset_fn=tf.data.TFRecordDataset, decoder_fn=decoder.decode, parser_fn=parser.parse_fn( params.is_training)) dataset = reader.read() iterator = iter(dataset) images, labels = next(iterator) np_images = images.numpy() np_labels = tf.nest.map_structure(lambda x: x.numpy(), labels) if is_training: self.assertAllEqual( np_images.shape, [batch_size, output_size[0], output_size[1], 3]) self.assertAllEqual(np_labels['image_info'].shape, [batch_size, 4, 2]) self.assertAllEqual(np_labels['gt_boxes'].shape, [batch_size, max_num_instances, 4]) self.assertAllEqual(np_labels['gt_classes'].shape, [batch_size, max_num_instances]) if include_mask: self.assertAllEqual(np_labels['gt_masks'].shape, [ batch_size, max_num_instances, mask_crop_size, mask_crop_size ]) for level in range(min_level, max_level + 1): stride = 2**level output_size_l = [ output_size[0] / stride, output_size[1] / stride ] anchors_per_location = num_scales * len(aspect_ratios) self.assertAllEqual( np_labels['rpn_score_targets'][level].shape, [ batch_size, output_size_l[0], output_size_l[1], anchors_per_location ]) self.assertAllEqual( np_labels['rpn_box_targets'][level].shape, [ batch_size, output_size_l[0], output_size_l[1], 4 * anchors_per_location ]) self.assertAllEqual(np_labels['anchor_boxes'][level].shape, [ batch_size, output_size_l[0], output_size_l[1], 4 * anchors_per_location ]) else: self.assertAllEqual( np_images.shape, [batch_size, output_size[0], output_size[1], 3]) self.assertAllEqual(np_labels['image_info'].shape, [batch_size, 4, 2])