Esempio n. 1
0
    def build_inputs(
            self,
            params: exp_cfg.DataConfig,
            input_context: Optional[tf.distribute.InputContext] = None):
        """Build input dataset."""
        decoder_cfg = params.decoder.get()
        if params.decoder.type == 'simple_decoder':
            decoder = tf_example_decoder.TfExampleDecoder(
                include_mask=self._task_config.model.include_mask,
                regenerate_source_id=decoder_cfg.regenerate_source_id,
                mask_binarize_threshold=decoder_cfg.mask_binarize_threshold)
        elif params.decoder.type == 'label_map_decoder':
            decoder = tf_example_label_map_decoder.TfExampleDecoderLabelMap(
                label_map=decoder_cfg.label_map,
                include_mask=self._task_config.model.include_mask,
                regenerate_source_id=decoder_cfg.regenerate_source_id,
                mask_binarize_threshold=decoder_cfg.mask_binarize_threshold)
        else:
            raise ValueError('Unknown decoder type: {}!'.format(
                params.decoder.type))

        parser = maskrcnn_input.Parser(
            output_size=self.task_config.model.input_size[:2],
            min_level=self.task_config.model.min_level,
            max_level=self.task_config.model.max_level,
            num_scales=self.task_config.model.anchor.num_scales,
            aspect_ratios=self.task_config.model.anchor.aspect_ratios,
            anchor_size=self.task_config.model.anchor.anchor_size,
            dtype=params.dtype,
            rpn_match_threshold=params.parser.rpn_match_threshold,
            rpn_unmatched_threshold=params.parser.rpn_unmatched_threshold,
            rpn_batch_size_per_im=params.parser.rpn_batch_size_per_im,
            rpn_fg_fraction=params.parser.rpn_fg_fraction,
            aug_rand_hflip=params.parser.aug_rand_hflip,
            aug_scale_min=params.parser.aug_scale_min,
            aug_scale_max=params.parser.aug_scale_max,
            skip_crowd_during_training=params.parser.
            skip_crowd_during_training,
            max_num_instances=params.parser.max_num_instances,
            include_mask=self._task_config.model.include_mask,
            mask_crop_size=params.parser.mask_crop_size)

        reader = input_reader_factory.input_reader_generator(
            params,
            dataset_fn=dataset_fn.pick_dataset_fn(params.file_type),
            decoder_fn=decoder.decode,
            parser_fn=parser.parse_fn(params.is_training))
        dataset = reader.read(input_context=input_context)

        return dataset
Esempio n. 2
0
    def testMaskRCNNInputReader(self, output_size, skip_crowd_during_training,
                                include_mask, is_training):
        min_level = 3
        max_level = 7
        num_scales = 3
        aspect_ratios = [1.0, 2.0, 0.5]
        max_num_instances = 100
        batch_size = 2
        mask_crop_size = 112
        anchor_size = 4.0

        params = cfg.DataConfig(
            input_path='/placer/prod/home/snaggletooth/test/data/coco/val*',
            global_batch_size=batch_size,
            is_training=is_training)

        parser = maskrcnn_input.Parser(
            output_size=output_size,
            min_level=min_level,
            max_level=max_level,
            num_scales=num_scales,
            aspect_ratios=aspect_ratios,
            anchor_size=anchor_size,
            rpn_match_threshold=0.7,
            rpn_unmatched_threshold=0.3,
            rpn_batch_size_per_im=256,
            rpn_fg_fraction=0.5,
            aug_rand_hflip=True,
            aug_scale_min=0.8,
            aug_scale_max=1.2,
            skip_crowd_during_training=skip_crowd_during_training,
            max_num_instances=max_num_instances,
            include_mask=include_mask,
            mask_crop_size=mask_crop_size,
            dtype='bfloat16')

        decoder = tf_example_decoder.TfExampleDecoder(
            include_mask=include_mask)
        reader = input_reader.InputReader(params,
                                          dataset_fn=tf.data.TFRecordDataset,
                                          decoder_fn=decoder.decode,
                                          parser_fn=parser.parse_fn(
                                              params.is_training))

        dataset = reader.read()
        iterator = iter(dataset)

        images, labels = next(iterator)

        np_images = images.numpy()
        np_labels = tf.nest.map_structure(lambda x: x.numpy(), labels)

        if is_training:
            self.assertAllEqual(
                np_images.shape,
                [batch_size, output_size[0], output_size[1], 3])
            self.assertAllEqual(np_labels['image_info'].shape,
                                [batch_size, 4, 2])
            self.assertAllEqual(np_labels['gt_boxes'].shape,
                                [batch_size, max_num_instances, 4])
            self.assertAllEqual(np_labels['gt_classes'].shape,
                                [batch_size, max_num_instances])
            if include_mask:
                self.assertAllEqual(np_labels['gt_masks'].shape, [
                    batch_size, max_num_instances, mask_crop_size,
                    mask_crop_size
                ])
            for level in range(min_level, max_level + 1):
                stride = 2**level
                output_size_l = [
                    output_size[0] / stride, output_size[1] / stride
                ]
                anchors_per_location = num_scales * len(aspect_ratios)
                self.assertAllEqual(
                    np_labels['rpn_score_targets'][level].shape, [
                        batch_size, output_size_l[0], output_size_l[1],
                        anchors_per_location
                    ])
                self.assertAllEqual(
                    np_labels['rpn_box_targets'][level].shape, [
                        batch_size, output_size_l[0], output_size_l[1],
                        4 * anchors_per_location
                    ])
                self.assertAllEqual(np_labels['anchor_boxes'][level].shape, [
                    batch_size, output_size_l[0], output_size_l[1],
                    4 * anchors_per_location
                ])
        else:
            self.assertAllEqual(
                np_images.shape,
                [batch_size, output_size[0], output_size[1], 3])
            self.assertAllEqual(np_labels['image_info'].shape,
                                [batch_size, 4, 2])