예제 #1
0
    def __call__(self, params, input_context=None, batch_size=None):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder(
            include_mask='segmentation' in params['heads'],
            regenerate_source_id=params['regenerate_source_id'])

        batch_size = batch_size or params['batch_size']
        seed = params.get('tf_random_seed', None)
        dataset = tf.data.Dataset.list_files(self._file_pattern,
                                             shuffle=self._is_training,
                                             seed=seed)
        if input_context:
            dataset = dataset.shard(input_context.num_input_pipelines,
                                    input_context.input_pipeline_id)
        # Prefetch data from files.
        def _prefetch_dataset(filename):
            if params.get('dataset_type', None) == 'sstable':
                pass
            else:
                dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.interleave(_prefetch_dataset,
                                     num_parallel_calls=tf.data.AUTOTUNE,
                                     deterministic=bool(seed))
        dataset = dataset.with_options(self.dataset_options)
        if self._is_training:
            dataset = dataset.shuffle(64, seed=seed)

        # Parse the fetched records to input tensors for model function.
        # pylint: disable=g-long-lambda
        if params.get('dataset_type', None) == 'sstable':
            map_fn = lambda key, value: self.dataset_parser(
                value, example_decoder, anchor_labeler, params)
        else:
            map_fn = lambda value: self.dataset_parser(value, example_decoder,
                                                       anchor_labeler, params)
        # pylint: enable=g-long-lambda
        dataset = dataset.map(map_fn, num_parallel_calls=tf.data.AUTOTUNE)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.batch(batch_size,
                                drop_remainder=params['drop_remainder'])
        dataset = dataset.map(
            lambda *args: self.process_example(params, batch_size, *args))
        dataset = dataset.prefetch(tf.data.AUTOTUNE)
        if self._is_training:
            dataset = dataset.repeat()
        if self._use_fake_data:
            # Turn this dataset into a semi-fake dataset which always loop at the
            # first batch. This reduces variance in performance and is useful in
            # testing.
            dataset = dataset.take(1).cache().repeat()
        return dataset
예제 #2
0
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder(
            include_mask='segmentation' in params['heads'],
            regenerate_source_id=params['regenerate_source_id'])

        batch_size = params['batch_size']
        dataset = tf.data.Dataset.list_files(self._file_pattern,
                                             shuffle=self._is_training)
        if self._is_training:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.interleave(
            _prefetch_dataset,
            num_parallel_calls=tf.data.experimental.AUTOTUNE)
        options = tf.data.Options()
        options.experimental_deterministic = not self._is_training
        dataset = dataset.with_options(options)
        if self._is_training:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(
            lambda value: self.dataset_parser(  # pylint: disable=g-long-lambda
                value, example_decoder, anchor_labeler, params),
            num_parallel_calls=tf.data.experimental.AUTOTUNE)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.batch(batch_size, drop_remainder=True)
        dataset = dataset.map(
            lambda *args: self.process_example(params, batch_size, *args))
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
        if self._use_fake_data:
            # Turn this dataset into a semi-fake dataset which always loop at the
            # first batch. This reduces variance in performance and is useful in
            # testing.
            dataset = dataset.take(1).cache().repeat()
        return dataset
예제 #3
0
 def test_parser(self):
     tf.random.set_seed(111111)
     params = hparams_config.get_detection_config(
         'efficientdet-d0').as_dict()
     input_anchors = anchors.Anchors(params['min_level'],
                                     params['max_level'],
                                     params['num_scales'],
                                     params['aspect_ratios'],
                                     params['anchor_scale'],
                                     params['image_size'])
     anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                            params['num_classes'])
     example_decoder = tf_example_decoder.TfExampleDecoder(
         regenerate_source_id=params['regenerate_source_id'])
     tfrecord_path = self._make_fake_tfrecord()
     dataset = tf.data.TFRecordDataset([tfrecord_path])
     value = next(iter(dataset))
     reader = dataloader.InputReader(tfrecord_path, True)
     result = reader.dataset_parser(value, example_decoder, anchor_labeler,
                                    params)
     self.assertEqual(len(result), 11)
예제 #4
0
 def _create_example_decoder(self):
     return tf_example_decoder.TfExampleDecoder(
         use_instance_mask=self._use_instance_mask)
예제 #5
0
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder(
            regenerate_source_id=params['regenerate_source_id'])

        @tf.autograph.experimental.do_not_convert
        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preprocessed to have normalized value and
          fixed dimension [image_height, image_width, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the processed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tensor is padded with -1 to the fixed
          dimension [self._max_instances_per_image, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tensor is
          padded with 0 to the fixed dimension [self._max_instances_per_image].
        areas: Groundtruth areas annotations. The tensor is padded with -1
          to the fixed dimension [self._max_instances_per_image].
        classes: Groundtruth classes annotations. The tensor is padded with -1
          to the fixed dimension [self._max_instances_per_image].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                # NOTE: The autoaugment method works best when used alongside the
                # standard horizontal flipping of images along with size jittering
                # and normalization.
                if params.get('autoaugment_policy',
                              None) and self._is_training:
                    from aug import autoaugment  # pylint: disable=g-import-not-at-top
                    image, boxes = autoaugment.distort_image_with_autoaugment(
                        image, boxes, params['autoaugment_policy'],
                        params['use_augmix'], *params['augmix_params'])

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'],
                        params.get('target_size', None))
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.strings.to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_instances_per_image, 4])
                is_crowds = pad_to_fixed_size(
                    is_crowds, 0, [self._max_instances_per_image, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_instances_per_image, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_instances_per_image, 1])
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, boxes, is_crowds, areas,
                        classes)

        batch_size = params['batch_size']
        dataset = tf.data.Dataset.list_files(self._file_pattern,
                                             shuffle=self._is_training)
        if self._is_training:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.data.experimental.parallel_interleave(_prefetch_dataset,
                                                     cycle_length=32,
                                                     sloppy=self._is_training))
        if self._is_training:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        @tf.autograph.experimental.do_not_convert
        def _process_example(images, cls_targets, box_targets, num_positives,
                             source_ids, image_scales, boxes, is_crowds, areas,
                             classes):
            """Processes one batch of data."""
            labels = {}
            # Count num_positives in a batch.
            num_positives_batch = tf.reduce_mean(num_positives)
            labels['mean_num_positives'] = tf.reshape(
                tf.tile(tf.expand_dims(num_positives_batch, 0), [
                    batch_size,
                ]), [batch_size, 1])

            if params['data_format'] == 'channels_first':
                images = tf.transpose(images, [0, 3, 1, 2])

            for level in range(params['min_level'], params['max_level'] + 1):
                labels['cls_targets_%d' % level] = cls_targets[level]
                labels['box_targets_%d' % level] = box_targets[level]
                if params['data_format'] == 'channels_first':
                    labels['cls_targets_%d' % level] = tf.transpose(
                        labels['cls_targets_%d' % level], [0, 3, 1, 2])
                    labels['box_targets_%d' % level] = tf.transpose(
                        labels['box_targets_%d' % level], [0, 3, 1, 2])
            # Concatenate groundtruth annotations to a tensor.
            groundtruth_data = tf.concat([boxes, is_crowds, areas, classes],
                                         axis=2)
            labels['source_ids'] = source_ids
            labels['groundtruth_data'] = groundtruth_data
            labels['image_scales'] = image_scales
            return images, labels

        dataset = dataset.map(_process_example)
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
        if self._use_fake_data:
            # Turn this dataset into a semi-fake dataset which always loop at the
            # first batch. This reduces variance in performance and is useful in
            # testing.
            dataset = dataset.take(1).cache().repeat()
        return dataset
예제 #6
0
파일: dataloader.py 프로젝트: hitlk/tpu
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                # Handle crowd annotations. As crowd annotations are not large
                # instances, the model ignores them in training.
                if params['skip_crowd']:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, cls_weights, box_targets, box_weights,
                 num_positives, num_negatives,
                 num_ignored) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                row = (image, cls_targets, cls_weights, box_targets,
                       box_weights, num_positives, num_negatives, num_ignored,
                       source_id, image_scale)
                return row

        # batch_size = params['batch_size']
        batch_size = self._batch_size

        dataset = tf.data.Dataset.list_files(self._file_pattern)

        dataset = dataset.shuffle(buffer_size=1024)
        if self._is_training:
            dataset = dataset.repeat()

        def prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename,
                                              buffer_size=8 * 1000 * 1000)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(prefetch_dataset,
                                                cycle_length=1,
                                                sloppy=True))
        dataset = dataset.shuffle(buffer_size=3072)

        dataset = dataset.map(_dataset_parser, num_parallel_calls=12)
        dataset = dataset.prefetch(32)
        dataset = dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(batch_size))
        dataset = dataset.prefetch(2)

        (images, cls_targets, cls_weights, box_targets, box_weights,
         num_positives, num_negatives, num_ignored, source_ids,
         image_scales) = dataset.make_one_shot_iterator().get_next()
        labels = {}
        # count num_positives in a batch
        num_positives_batch = tf.reduce_mean(num_positives)
        labels['mean_num_positives'] = tf.reshape(
            tf.tile(tf.expand_dims(num_positives_batch, 0), [
                batch_size,
            ]), [batch_size, 1])

        num_negatives_batch = tf.reduce_mean(num_negatives)
        labels['mean_num_negatives'] = tf.reshape(
            tf.tile(tf.expand_dims(num_negatives_batch, 0), [
                batch_size,
            ]), [batch_size, 1])

        num_ignored_batch = tf.reduce_mean(num_ignored)
        labels['mean_num_ignored'] = tf.reshape(
            tf.tile(tf.expand_dims(num_ignored_batch, 0), [batch_size]),
            [batch_size, 1])

        for level in range(params['min_level'], params['max_level'] + 1):
            labels['cls_targets_%d' % level] = cls_targets[level]
            labels['cls_weights_%d' % level] = cls_weights[level]
            labels['box_targets_%d' % level] = box_targets[level]
            labels['box_weights_%d' % level] = box_weights[level]
        labels['source_ids'] = source_ids
        labels['image_scales'] = image_scales
        return images, labels
예제 #7
0
    def __call__(self, params):
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def _parse_example(data):
            with tf.name_scope('augmentation'):
                source_id = data['source_id']
                image = tf.image.convert_image_dtype(data['image'],
                                                     dtype=tf.float32)
                raw_shape = tf.shape(image)
                boxes = data['groundtruth_boxes']
                classes = tf.reshape(data['groundtruth_classes'], [-1, 1])

                # Only 80 of the 90 COCO classes are used.
                class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
                classes = tf.gather(class_map, classes)
                classes = tf.cast(classes, dtype=tf.float32)

                if self._is_training:
                    image, boxes, classes = ssd_crop(image, boxes, classes)

                    # random_horizontal_flip() is hard coded to flip with 50% chance.
                    mlperf_log.ssd_print(
                        key=mlperf_log.RANDOM_FLIP_PROBABILITY, value=0.5)
                    image, boxes = preprocessor.random_horizontal_flip(
                        image=image, boxes=boxes)

                    # TODO(shibow): Investigate the parameters for color jitter.
                    image = color_jitter(image,
                                         brightness=0.125,
                                         contrast=0.5,
                                         saturation=0.5,
                                         hue=0.05)
                    image = normalize_image(image)

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
                        boxes, classes)

                    # TODO(taylorrobie): Check that this cast is valid.
                    encoded_classes = tf.cast(encoded_classes, tf.int32)

                    labels = {
                        ssd_constants.NUM_MATCHED_BOXES: num_matched_boxes,
                        ssd_constants.BOXES: encoded_boxes,
                        ssd_constants.CLASSES: encoded_classes,
                    }
                    # This is for dataloader visualization; actual model doesn't use this.
                    if params['visualize_dataloader']:
                        box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
                            scale_factors=ssd_constants.BOX_CODER_SCALES)
                        decoded_boxes = tf.expand_dims(box_coder.decode(
                            rel_codes=tf.squeeze(encoded_boxes),
                            anchors=box_list.BoxList(
                                tf.convert_to_tensor(
                                    DefaultBoxes()('ltrb')))).get(),
                                                       axis=0)
                        labels['decoded_boxes'] = tf.squeeze(decoded_boxes)

                    return image, labels

                else:
                    mlperf_log.ssd_print(key=mlperf_log.INPUT_SIZE,
                                         value=ssd_constants.IMAGE_SIZE)
                    image = tf.image.resize_images(
                        image[tf.newaxis, :, :, :],
                        size=(ssd_constants.IMAGE_SIZE,
                              ssd_constants.IMAGE_SIZE))[0, :, :, :]

                    image = normalize_image(image)

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    def trim_and_pad(inp_tensor, dim_1):
                        """Limit the number of boxes, and pad if necessary."""
                        inp_tensor = inp_tensor[:ssd_constants.
                                                MAX_NUM_EVAL_BOXES]
                        num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(
                            inp_tensor)[0]
                        inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
                        return tf.reshape(
                            inp_tensor,
                            [ssd_constants.MAX_NUM_EVAL_BOXES, dim_1])

                    boxes, classes = trim_and_pad(boxes,
                                                  4), trim_and_pad(classes, 1)

                    return {
                        ssd_constants.IMAGE:
                        image,
                        ssd_constants.BOXES:
                        boxes,
                        ssd_constants.CLASSES:
                        classes,
                        ssd_constants.SOURCE_ID:
                        tf.string_to_number(source_id, tf.int32),
                        ssd_constants.RAW_SHAPE:
                        raw_shape,
                    }

        batch_size = params['batch_size']
        dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
        mlperf_log.ssd_print(key=mlperf_log.INPUT_ORDER)
        mlperf_log.ssd_print(key=mlperf_log.INPUT_BATCH_SIZE, value=batch_size)

        if self._is_training:
            dataset = dataset.shard(
                params['context'].num_hosts,
                params['context'].current_input_fn_deployment()[1])
            dataset = dataset.shuffle(
                tf.to_int64(256 / params['context'].num_hosts))

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(_prefetch_dataset,
                                                cycle_length=32,
                                                sloppy=self._is_training))

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(example_decoder.decode, num_parallel_calls=64)

        if self._is_training:
            dataset = dataset.map(
                # pylint: disable=g-long-lambda
                lambda data:
                (data, tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0)),
                num_parallel_calls=64)
            dataset = dataset.filter(lambda data, pred: pred)
            dataset = dataset.prefetch(batch_size * 64)
            dataset = dataset.cache().apply(
                tf.contrib.data.shuffle_and_repeat(64))
            dataset = dataset.prefetch(batch_size * 64)
            dataset = dataset.apply(
                tf.contrib.data.map_and_batch(
                    lambda data, _: _parse_example(data),
                    batch_size=batch_size,
                    drop_remainder=True,
                    num_parallel_calls=128))
        else:
            dataset = dataset.prefetch(batch_size * 64)
            dataset = dataset.apply(
                tf.contrib.data.map_and_batch(_parse_example,
                                              batch_size=batch_size,
                                              drop_remainder=True,
                                              num_parallel_calls=128))

        # Manually apply the double transpose trick for training data.
        def _transpose_dataset(image, labels):
            image = tf.transpose(image, [1, 2, 3, 0])
            labels[ssd_constants.BOXES] = tf.transpose(
                labels[ssd_constants.BOXES], [1, 2, 0])
            labels[ssd_constants.CLASSES] = tf.transpose(
                labels[ssd_constants.CLASSES], [1, 2, 0])
            return image, labels

        if self._transpose_input and self._is_training:
            dataset = dataset.map(_transpose_dataset, num_parallel_calls=128)

        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)

        return dataset
예제 #8
0
    def __call__(self, params):
        image_size = (params['image_size'], params['image_size'])
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'], image_size)
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'],
                                               params['rpn_positive_overlap'],
                                               params['rpn_negative_overlap'],
                                               params['rpn_batch_size_per_im'],
                                               params['rpn_fg_fraction'])

        example_decoder = tf_example_decoder.TfExampleDecoder(
            use_instance_mask=True)

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                instance_masks = data['groundtruth_instance_masks']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                if not params['use_category']:
                    classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

                if (params['skip_crowd_during_training']
                        and self._mode == tf.estimator.ModeKeys.TRAIN):
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)
                    instance_masks = tf.gather_nd(instance_masks, indices)

                input_processor = InstanceSegmentationInputProcessor(
                    image, image_size, boxes, classes, instance_masks)
                input_processor.normalize_image()
                if (self._mode == tf.estimator.ModeKeys.TRAIN
                        and params['input_rand_hflip']):
                    input_processor.random_horizontal_flip()
                if self._mode == tf.estimator.ModeKeys.TRAIN:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()
                instance_masks = input_processor.resize_and_crop_masks()
                cropped_gt_masks = input_processor.crop_gt_masks(
                    instance_masks, boxes, params['gt_mask_size'], image_size)

                # Assign anchors.
                score_targets, box_targets = anchor_labeler.label_anchors(
                    boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                image_scale = input_processor.image_scale_to_original
                scaled_height = input_processor.get_height_length()
                scaled_width = input_processor.get_width_length()
                image_info = tf.stack([
                    tf.to_float(scaled_height),
                    tf.to_float(scaled_width),
                    image_scale,
                    tf.to_float(input_processor.get_original_height),
                    tf.to_float(input_processor.get_original_width),
                ])
                # Pad groundtruth data for evaluation.
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                # Pads cropped_gt_masks.
                cropped_gt_masks = tf.reshape(cropped_gt_masks,
                                              [self._max_num_instances, -1])
                cropped_gt_masks = pad_to_fixed_size(
                    cropped_gt_masks, -1,
                    [self._max_num_instances, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                    self._max_num_instances, params['gt_mask_size'] + 4,
                    params['gt_mask_size'] + 4
                ])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, score_targets, box_targets, source_id,
                        image_info, boxes, is_crowds, areas, classes,
                        cropped_gt_masks)

        # batch_size = params['batch_size']
        batch_size = params['batch_size'] if 'batch_size' in params else 1
        dataset = tf.data.Dataset.list_files(
            self._file_pattern,
            shuffle=(self._mode == tf.estimator.ModeKeys.TRAIN))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(
                _prefetch_dataset,
                cycle_length=32,
                sloppy=(self._mode == tf.estimator.ModeKeys.TRAIN)))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        def _process_example(images, score_targets, box_targets, source_ids,
                             image_info, boxes, is_crowds, areas, classes,
                             cropped_gt_masks):
            """Processes one batch of data."""
            # Transposes images for TPU performance.
            # Given the batch size, the batch dimesion (N) goes to either the minor
            # ((H, W, C, N) when N > C) or the second-minor ((H, W, N, C) when N < C)
            # dimension. Here, we assume N is 4 or 8 and C is 3, so we use
            # (H, W, C, N).
            if (params['transpose_input']
                    and self._mode == tf.estimator.ModeKeys.TRAIN):
                images = tf.transpose(images, [1, 2, 3, 0])

            labels = {}
            for level in range(params['min_level'], params['max_level'] + 1):
                labels['score_targets_%d' % level] = score_targets[level]
                labels['box_targets_%d' % level] = box_targets[level]
            # Concatenate groundtruth annotations to a tensor.
            groundtruth_data = tf.concat([boxes, is_crowds, areas, classes],
                                         axis=2)
            labels['source_ids'] = source_ids
            labels['groundtruth_data'] = groundtruth_data
            labels['image_info'] = image_info
            labels['cropped_gt_masks'] = cropped_gt_masks
            if self._mode == tf.estimator.ModeKeys.PREDICT:
                features = dict(images=images,
                                image_info=image_info,
                                groundtruth_data=groundtruth_data,
                                source_ids=source_ids)
                return features
            else:
                return images, labels

        dataset = dataset.map(_process_example)
        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)

        if self._num_examples > 0:
            dataset = dataset.take(self._num_examples)
        if self._use_fake_data:
            # Turn this dataset into a semi-fake dataset which always loop at the
            # first batch. This reduces variance in performance and is useful in
            # testing.
            dataset = dataset.take(1).cache().repeat()
        return dataset
예제 #9
0
    def __call__(self, params):
        image_size = (params['image_size'], params['image_size'])
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'], image_size)
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'],
                                               params['rpn_positive_overlap'],
                                               params['rpn_negative_overlap'],
                                               params['rpn_batch_size_per_im'],
                                               params['rpn_fg_fraction'])

        example_decoder = tf_example_decoder.TfExampleDecoder(
            use_instance_mask=self._use_instance_mask)

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: a dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: Image tensor that is preproessed to have normalized value and
            fixed dimension [image_size, image_size, 3]
          image_info: image information that includes the original height and
            width, the scale of the proccessed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: a dictionary that contains auxiliary information plus (optional)
          labels. The following describes {key: value} pairs in the dictionary.
          `labels` is only for training.
          score_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: groundtrugh masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                image = data['image']
                source_id = data['source_id']
                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                if self._mode == tf.estimator.ModeKeys.PREDICT:
                    input_processor = InstanceSegmentationInputProcessor(
                        image, image_size)
                    input_processor.normalize_image()
                    input_processor.set_scale_factors_to_output_size()
                    image = input_processor.resize_and_crop_image()
                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    image_info = input_processor.get_image_info()
                    return {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id
                    }

                elif self._mode == tf.estimator.ModeKeys.TRAIN:
                    instance_masks = None
                    if self._use_instance_mask:
                        instance_masks = data['groundtruth_instance_masks']
                    boxes = data['groundtruth_boxes']
                    classes = data['groundtruth_classes']
                    classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                         [-1, 1])
                    if not params['use_category']:
                        classes = tf.cast(tf.greater(classes, 0),
                                          dtype=tf.float32)

                    if (params['skip_crowd_during_training']
                            and self._mode == tf.estimator.ModeKeys.TRAIN):
                        indices = tf.where(
                            tf.logical_not(data['groundtruth_is_crowd']))
                        classes = tf.gather_nd(classes, indices)
                        boxes = tf.gather_nd(boxes, indices)
                        if self._use_instance_mask:
                            instance_masks = tf.gather_nd(
                                instance_masks, indices)

                    input_processor = InstanceSegmentationInputProcessor(
                        image, image_size, boxes, classes, instance_masks)
                    input_processor.normalize_image()
                    if params['input_rand_hflip']:
                        input_processor.random_horizontal_flip()

                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                    image = input_processor.resize_and_crop_image()

                    boxes, classes = input_processor.resize_and_crop_boxes()
                    if self._use_instance_mask:
                        instance_masks = input_processor.resize_and_crop_masks(
                        )
                        cropped_gt_masks = input_processor.crop_gt_masks(
                            instance_masks, boxes, params['gt_mask_size'],
                            image_size)

                    # Assign anchors.
                    score_targets, box_targets = anchor_labeler.label_anchors(
                        boxes, classes)

                    # Pad groundtruth data.
                    image_info = input_processor.get_image_info()
                    boxes *= image_info[2]
                    boxes = pad_to_fixed_size(boxes, -1,
                                              [self._max_num_instances, 4])
                    classes = pad_to_fixed_size(classes, -1,
                                                [self._max_num_instances, 1])

                    # Pads cropped_gt_masks.
                    if self._use_instance_mask:
                        cropped_gt_masks = tf.reshape(
                            cropped_gt_masks, [self._max_num_instances, -1])
                        cropped_gt_masks = pad_to_fixed_size(
                            cropped_gt_masks, -1, [
                                self._max_num_instances,
                                (params['gt_mask_size'] + 4)**2
                            ])
                        cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                            self._max_num_instances, params['gt_mask_size'] +
                            4, params['gt_mask_size'] + 4
                        ])

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    features = {}
                    features['images'] = image
                    features['image_info'] = image_info
                    features['source_ids'] = source_id
                    labels = {}
                    for level in range(params['min_level'],
                                       params['max_level'] + 1):
                        labels['score_targets_%d' %
                               level] = score_targets[level]
                        labels['box_targets_%d' % level] = box_targets[level]
                    labels['gt_boxes'] = boxes
                    labels['gt_classes'] = classes
                    if self._use_instance_mask:
                        labels['cropped_gt_masks'] = cropped_gt_masks
                    return (features, labels)

        batch_size = params['batch_size'] if 'batch_size' in params else 1
        dataset = tf.data.Dataset.list_files(
            self._file_pattern,
            shuffle=(self._mode == tf.estimator.ModeKeys.TRAIN))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(
                _prefetch_dataset,
                cycle_length=32,
                sloppy=(self._mode == tf.estimator.ModeKeys.TRAIN)))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.apply(
            tf.contrib.data.map_and_batch(_dataset_parser,
                                          batch_size=batch_size,
                                          num_parallel_batches=64,
                                          drop_remainder=True))

        # Transposes images for TPU performance.
        # Given the batch size, the batch dimesion (N) goes to either the minor
        # ((H, W, C, N) when N > C) or the second-minor ((H, W, N, C) when N < C)
        # dimension. Here, we assume N is 4 or 8 and C is 3, so we use
        # (H, W, C, N).
        if (params['transpose_input']
                and self._mode == tf.estimator.ModeKeys.TRAIN):

            def _transpose_images(features, labels):
                features['images'] = tf.transpose(features['images'],
                                                  [1, 2, 3, 0])
                return features, labels

            dataset = dataset.map(_transpose_images, num_parallel_calls=64)

        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)

        if self._num_examples > 0:
            dataset = dataset.take(self._num_examples)
        if self._use_fake_data:
            # Turn this dataset into a semi-fake dataset which always loop at the
            # first batch. This reduces variance in performance and is useful in
            # testing.
            dataset = dataset.take(1).cache().repeat()
        return dataset
예제 #10
0
    def __call__(self, params):
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def _parse_example(data):
            with tf.name_scope('augmentation'):
                source_id = data['source_id']
                image = data['image']  # dtype uint8
                raw_shape = tf.shape(image)
                boxes = data['groundtruth_boxes']
                classes = tf.reshape(data['groundtruth_classes'], [-1, 1])

                # Only 80 of the 90 COCO classes are used.
                class_map = tf.convert_to_tensor(ssd_constants.CLASS_MAP)
                classes = tf.gather(class_map, classes)
                classes = tf.cast(classes, dtype=tf.float32)

                if self._is_training:
                    image, boxes, classes = ssd_crop(image, boxes, classes)
                    # ssd_crop resizes and returns image of dtype float32 and does not
                    # change its range (i.e., value in between 0--255). Divide by 255.
                    # converts it to [0, 1] range. Not doing this before cropping to
                    # avoid dtype cast (which incurs additional memory copy).
                    image /= 255.0

                    # random_horizontal_flip() is hard coded to flip with 50% chance.
                    image, boxes = preprocessor.random_horizontal_flip(
                        image=image, boxes=boxes)

                    # TODO(shibow): Investigate the parameters for color jitter.
                    image = color_jitter(image,
                                         brightness=0.125,
                                         contrast=0.5,
                                         saturation=0.5,
                                         hue=0.05)

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
                        boxes, classes)

                    # TODO(taylorrobie): Check that this cast is valid.
                    encoded_classes = tf.cast(encoded_classes, tf.int32)

                    labels = {
                        ssd_constants.NUM_MATCHED_BOXES: num_matched_boxes,
                        ssd_constants.BOXES: encoded_boxes,
                        ssd_constants.CLASSES: tf.squeeze(encoded_classes,
                                                          axis=1),
                    }
                    # This is for dataloader visualization; actual model doesn't use this.
                    if params['visualize_dataloader']:
                        box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
                            scale_factors=ssd_constants.BOX_CODER_SCALES)
                        decoded_boxes = tf.expand_dims(box_coder.decode(
                            rel_codes=tf.squeeze(encoded_boxes),
                            anchors=box_list.BoxList(
                                tf.convert_to_tensor(
                                    DefaultBoxes()('ltrb')))).get(),
                                                       axis=0)
                        labels['decoded_boxes'] = tf.squeeze(decoded_boxes)

                    return image, labels

                else:
                    image = tf.image.resize_images(
                        image,
                        size=(ssd_constants.IMAGE_SIZE,
                              ssd_constants.IMAGE_SIZE))
                    # resize_image returns image of dtype float32 and does not change its
                    # range. Divide by 255 to convert image to [0, 1] range.
                    image /= 255.

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    def trim_and_pad(inp_tensor, dim_1):
                        """Limit the number of boxes, and pad if necessary."""
                        inp_tensor = inp_tensor[:ssd_constants.
                                                MAX_NUM_EVAL_BOXES]
                        num_pad = ssd_constants.MAX_NUM_EVAL_BOXES - tf.shape(
                            inp_tensor)[0]
                        inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
                        return tf.reshape(
                            inp_tensor,
                            [ssd_constants.MAX_NUM_EVAL_BOXES, dim_1])

                    boxes, classes = trim_and_pad(boxes,
                                                  4), trim_and_pad(classes, 1)

                    sample = {
                        ssd_constants.IMAGE:
                        image,
                        ssd_constants.BOXES:
                        boxes,
                        ssd_constants.CLASSES:
                        classes,
                        ssd_constants.SOURCE_ID:
                        tf.string_to_number(source_id, tf.int32),
                        ssd_constants.RAW_SHAPE:
                        raw_shape,
                    }

                    if not self._is_training and self._count > params[
                            'eval_samples']:
                        sample[ssd_constants.IS_PADDED] = data[
                            ssd_constants.IS_PADDED]
                    return sample

        batch_size = params['batch_size']
        dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)

        if self._is_training or self._distributed_eval:
            if 'context' in params:
                dataset = dataset.shard(
                    params['context'].num_hosts,
                    params['context'].current_input_fn_deployment()[1])
                if self._is_training:
                    dataset = dataset.shuffle(
                        tf.to_int64(256 / params['context'].num_hosts))
            else:
                dataset = dataset.shard(params['dataset_num_shards'],
                                        params['dataset_index'])
                if self._is_training:
                    dataset = dataset.shuffle(
                        tf.to_int64(256 / params['dataset_num_shards']))

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.data.experimental.parallel_interleave(_prefetch_dataset,
                                                     cycle_length=32,
                                                     sloppy=self._is_training))

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(example_decoder.decode, num_parallel_calls=64)

        def _mark_is_padded(data):
            sample = data
            sample[ssd_constants.IS_PADDED] = tf.constant(True, dtype=tf.bool)
            return sample

        def _mark_is_not_padded(data):
            sample = data
            sample[ssd_constants.IS_PADDED] = tf.constant(False, dtype=tf.bool)
            return sample

        # Pad dataset to the desired size and mark if the data is padded.
        # During eval/predict, if local_batch_size * num_shards > 5000,
        # original dataset size won't be fit for computations on that number
        # of shards. In this case, will take
        # (local_batch_size - 5000 / num_shards) data from the original dataset
        # on each shard and mark the padded data as `is_padded`.
        # Also mark the original data as `not_padded`.
        # Append the padded data to the original dataset.
        if not self._is_training and self._count > params['eval_samples']:
            padded_dataset = dataset.map(_mark_is_padded)
            dataset = dataset.map(_mark_is_not_padded)
            dataset = dataset.concatenate(padded_dataset).take(
                self._count // params['dataset_num_shards'])

        if self._is_training:
            dataset = dataset.map(
                # pylint: disable=g-long-lambda
                lambda data:
                (data, tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0)),
                num_parallel_calls=64)
            dataset = dataset.filter(lambda data, pred: pred)
            # Prefetching and caching increases the memory usage, so disable when
            # using fake data.
            if not self._use_fake_data:
                dataset = dataset.cache().shuffle(64).repeat()
            dataset = dataset.map(lambda data, _: _parse_example(data),
                                  num_parallel_calls=64)
            dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)
        else:
            dataset = dataset.prefetch(batch_size * 64)
            dataset = dataset.map(_parse_example, num_parallel_calls=64)
            dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)

        if params['conv0_space_to_depth']:

            def _space_to_depth_training_fn(images, labels):
                images = fused_transpose_and_space_to_depth(
                    images,
                    block_size=ssd_constants.SPACE_TO_DEPTH_BLOCK_SIZE,
                    transpose_input=self._transpose_input)
                if self._transpose_input and batch_size > 8:
                    labels[ssd_constants.BOXES] = tf.transpose(
                        labels[ssd_constants.BOXES], [1, 2, 0])
                return images, labels

            def _space_to_depth_eval_fn(labels):
                images = labels[ssd_constants.IMAGE]
                labels[
                    ssd_constants.IMAGE] = fused_transpose_and_space_to_depth(
                        images,
                        block_size=ssd_constants.SPACE_TO_DEPTH_BLOCK_SIZE,
                        transpose_input=False)
                return labels

            if self._is_training:
                space_to_depth_fn = _space_to_depth_training_fn
            else:
                space_to_depth_fn = _space_to_depth_eval_fn
            dataset = dataset.map(space_to_depth_fn, num_parallel_calls=64)
        elif self._transpose_input and self._is_training:
            # Manually apply the double transpose trick for training data.
            def _transpose_dataset(image, labels):
                if batch_size > 8:
                    image = tf.transpose(image, [1, 2, 3, 0])
                    labels[ssd_constants.BOXES] = tf.transpose(
                        labels[ssd_constants.BOXES], [1, 2, 0])
                else:
                    image = tf.transpose(image, [1, 2, 0, 3])
                return image, labels

            dataset = dataset.map(_transpose_dataset, num_parallel_calls=64)

        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
        options = tf.data.Options()
        options.experimental_threading.max_intra_op_parallelism = 1
        options.experimental_threading.private_threadpool_size = 48
        dataset = dataset.with_options(options)

        if self._use_fake_data:
            dataset = dataset.take(1).cache().repeat()

        return dataset
예제 #11
0
  def __call__(self, params):
    input_anchors = anchors.Anchors(params['min_level'], params['max_level'],
                                    params['num_scales'],
                                    params['aspect_ratios'],
                                    params['anchor_scale'],
                                    params['image_size'])
    anchor_labeler = anchors.AnchorLabeler(input_anchors, params['num_classes'])
    example_decoder = tf_example_decoder.TfExampleDecoder()

    def _dataset_parser(value):
      """Parse data to a fixed dimension input image and learning targets."""
      with tf.name_scope('parser'):
        data = example_decoder.decode(value)

        source_id = data['source_id']
        # for xView dataset only; basically the original name is 122.tif and we will change it to number 122 later on.
        # len = tf.size(tf.string_split([data['source_id']],""))
        # source_id = tf.substr(data['source_id'],0,len - 4)

        image = data['image']
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']
        classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])
        # Handle crowd annotations. As crowd annotations are not large
        # instances, the model ignores them in training.
        if params['skip_crowd']:
          indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
          classes = tf.gather_nd(classes, indices)
          boxes = tf.gather_nd(boxes, indices)

        # the image normalization is identical to Cloud TPU ResNet-50
        image = tf.image.convert_image_dtype(image, dtype=tf.float32)
        image = _normalize_image(image)

        if params['input_rand_hflip']:
          image, boxes = preprocessor.random_horizontal_flip(image, boxes=boxes)
        image_original_shape = tf.shape(image)
        image, _ = preprocessor.resize_to_range(
            image,
            min_dimension=params['image_size'],
            max_dimension=params['image_size'])
        image_scale = tf.to_float(image_original_shape[0]) / tf.to_float(
            tf.shape(image)[0])
        image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
            image, boxes, keypoints=None)

        image = tf.image.pad_to_bounding_box(image, 0, 0, params['image_size'],
                                             params['image_size'])
        (cls_targets, box_targets,
         num_positives) = anchor_labeler.label_anchors(boxes, classes)
        #
        # sess = tf.get_default_session()
        # print("source id is", sess.run(source_id))

        source_id = tf.string_to_number(source_id, out_type=tf.float32)

        # sess = tf.get_default_session()
        # print("after conversion, source id is", sess.run(source_id))

        if params['use_bfloat16']:
          image = tf.cast(image, dtype=tf.bfloat16)
        row = (image, cls_targets, box_targets, num_positives, source_id,
               image_scale)
        return row

    batch_size = params['batch_size']

    dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)

    dataset = dataset.shuffle(buffer_size=1024)
    if self._is_training:
      dataset = dataset.repeat()

    def prefetch_dataset(filename):
      dataset = tf.data.TFRecordDataset(filename).prefetch(1)
      return dataset

    dataset = dataset.apply(
        tf.contrib.data.parallel_interleave(
            prefetch_dataset, cycle_length=32, sloppy=True))
    dataset = dataset.shuffle(20)

    dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
    dataset = dataset.prefetch(batch_size)
    dataset = dataset.apply(
        tf.contrib.data.batch_and_drop_remainder(batch_size))
    dataset = dataset.prefetch(1)


    (images, cls_targets, box_targets, num_positives, source_ids,
     image_scales) = dataset.make_one_shot_iterator().get_next()
    labels = {}
    # count num_positives in a batch
    num_positives_batch = tf.reduce_mean(num_positives)
    labels['mean_num_positives'] = tf.reshape(
        tf.tile(tf.expand_dims(num_positives_batch, 0), [
            batch_size,
        ]), [batch_size, 1])

    for level in range(params['min_level'], params['max_level'] + 1):
      labels['cls_targets_%d' % level] = cls_targets[level]
      labels['box_targets_%d' % level] = box_targets[level]
    labels['source_ids'] = source_ids
    labels['image_scales'] = image_scales
    # from tensorflow.python.data.ops import dataset_ops
    # return dataset_ops.Dataset.zip((images, labels))
    return images, labels
예제 #12
0
파일: dataloader.py 프로젝트: yanyiting/tpu
    def _create_dataset_parser_fn(self, params):
        """Create parser for parsing input data (dictionary)."""
        example_decoder = tf_example_decoder.TfExampleDecoder(
            use_instance_mask=self._use_instance_mask)

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: a dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: Image tensor that is preproessed to have normalized value and
            fixed dimension [image_size, image_size, 3]
          image_info: image information that includes the original height and
            width, the scale of the proccessed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: a dictionary that contains auxiliary information plus (optional)
          labels. The following describes {key: value} pairs in the dictionary.
          `labels` is only for training.
          score_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: groundtrugh masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                image = data['image']
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                source_id = data['source_id']
                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                if self._mode == tf.estimator.ModeKeys.PREDICT:
                    image = preprocess_ops.normalize_image(image)
                    image, image_info, _, _ = preprocess_ops.resize_and_pad(
                        image, params['image_size'], 2**params['max_level'])
                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    return {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id,
                    }

                elif self._mode == tf.estimator.ModeKeys.TRAIN:
                    instance_masks = None
                    if self._use_instance_mask:
                        instance_masks = data['groundtruth_instance_masks']
                    boxes = data['groundtruth_boxes']
                    classes = data['groundtruth_classes']
                    classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                         [-1, 1])
                    if not params['use_category']:
                        classes = tf.cast(tf.greater(classes, 0),
                                          dtype=tf.float32)

                    if (params['skip_crowd_during_training']
                            and self._mode == tf.estimator.ModeKeys.TRAIN):
                        indices = tf.where(
                            tf.logical_not(data['groundtruth_is_crowd']))
                        classes = tf.gather_nd(classes, indices)
                        boxes = tf.gather_nd(boxes, indices)
                        if self._use_instance_mask:
                            instance_masks = tf.gather_nd(
                                instance_masks, indices)

                    image = preprocess_ops.normalize_image(image)
                    # Random flipping.
                    if params['input_rand_hflip']:
                        flipped_results = (
                            preprocess_ops.random_horizontal_flip(
                                image, boxes=boxes, masks=instance_masks))
                        if self._use_instance_mask:
                            image, boxes, instance_masks = flipped_results
                        else:
                            image, boxes = flipped_results
                    # Scaling and padding.
                    image, image_info, boxes, instance_masks = (
                        preprocess_ops.resize_and_pad(image,
                                                      params['image_size'],
                                                      2**params['max_level'],
                                                      boxes=boxes,
                                                      masks=instance_masks))
                    padded_height, padded_width, _ = image.get_shape().as_list(
                    )
                    padded_image_size = (padded_height, padded_width)
                    if self._use_instance_mask:
                        cropped_gt_masks = preprocess_ops.crop_gt_masks(
                            instance_masks, boxes, params['gt_mask_size'],
                            padded_image_size)

                    input_anchors = anchors.Anchors(params['min_level'],
                                                    params['max_level'],
                                                    params['num_scales'],
                                                    params['aspect_ratios'],
                                                    params['anchor_scale'],
                                                    padded_image_size)
                    anchor_labeler = anchors.AnchorLabeler(
                        input_anchors, params['num_classes'],
                        params['rpn_positive_overlap'],
                        params['rpn_negative_overlap'],
                        params['rpn_batch_size_per_im'],
                        params['rpn_fg_fraction'])

                    # Assign anchors.
                    score_targets, box_targets = anchor_labeler.label_anchors(
                        boxes, classes)

                    # Pad groundtruth data.
                    boxes *= image_info[2]
                    boxes = preprocess_ops.pad_to_fixed_size(
                        boxes, -1, [self._max_num_instances, 4])
                    classes = preprocess_ops.pad_to_fixed_size(
                        classes, -1, [self._max_num_instances, 1])

                    # Pads cropped_gt_masks.
                    if self._use_instance_mask:
                        cropped_gt_masks = tf.reshape(
                            cropped_gt_masks, [self._max_num_instances, -1])
                        cropped_gt_masks = preprocess_ops.pad_to_fixed_size(
                            cropped_gt_masks, -1, [
                                self._max_num_instances,
                                (params['gt_mask_size'] + 4)**2
                            ])
                        cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                            self._max_num_instances, params['gt_mask_size'] +
                            4, params['gt_mask_size'] + 4
                        ])

                    if params['use_bfloat16']:
                        image = tf.cast(image, dtype=tf.bfloat16)

                    features = {
                        'images': image,
                        'image_info': image_info,
                        'source_ids': source_id,
                    }
                    labels = {}
                    for level in range(params['min_level'],
                                       params['max_level'] + 1):
                        labels['score_targets_%d' %
                               level] = score_targets[level]
                        labels['box_targets_%d' % level] = box_targets[level]
                    labels['gt_boxes'] = boxes
                    labels['gt_classes'] = classes
                    if self._use_instance_mask:
                        labels['cropped_gt_masks'] = cropped_gt_masks
                    return (features, labels)

        return _dataset_parser
예제 #13
0
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preprocessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        image_scale: Scale of the processed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tensor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        areas: Groundtruth areas annotations. The tensor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tensor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                #shape=[height,weight,3]
                image = data['image']
                #shape=[M,4]---->[0-1]
                boxes = data['groundtruth_boxes']
                #shape=[M,]
                classes = data['groundtruth_classes']
                # shape=[M,1]
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                #shape=[M,1]
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                ####
                # NOTE: The autoaugment method works best when used alongside the
                # standard horizontal flipping of images along with size jittering
                # and normalization.
                #if params.get('autoaugment_policy', None) and self._is_training:
                #from aug import autoaugment  # pylint: disable=g-import-not-at-top
                #image, boxes = autoaugment.distort_image_with_autoaugment(
                #image, boxes, params['autoaugment_policy'])
                #####

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                #[-1.0,1.0]---->可以改成yolo的归一化吗?通过/255
                input_processor.normalize_image()
                #
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()

                image = input_processor.resize_and_crop_image()
                #shape=[M',4],shape=[M,1]
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                boxes *= image_scale
                #is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                #is_crowds = pad_to_fixed_size(is_crowds, 0,
                #[self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                #if params['use_bfloat16']:
                #image = tf.cast(image, dtype=tf.bfloat16)
                return (image, cls_targets, box_targets, num_positives,
                        image_scale, boxes, areas, classes)

        batch_size = params['batch_size']
        dataset = tf.data.Dataset.list_files(self._file_pattern,
                                             shuffle=self._is_training)
        if self._is_training:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.data.experimental.parallel_interleave(_prefetch_dataset,
                                                     cycle_length=32,
                                                     sloppy=self._is_training))
        if self._is_training:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        def _process_example(images, cls_targets, box_targets, num_positives,
                             image_scales, boxes, areas, classes):
            """Processes one batch of data."""
            labels = {}
            # Count num_positives in a batch.
            num_positives_batch = tf.reduce_mean(num_positives)
            labels['mean_num_positives'] = tf.reshape(
                tf.tile(tf.expand_dims(num_positives_batch, 0), [
                    batch_size,
                ]), [batch_size, 1])

            for level in range(params['min_level'], params['max_level'] + 1):
                labels['cls_targets_%d' % level] = cls_targets[level]
                labels['box_targets_%d' % level] = box_targets[level]
            # Concatenate groundtruth annotations to a tensor.
            groundtruth_data = tf.concat([boxes, areas, classes], axis=2)
            #labels['source_ids'] = source_ids
            labels['groundtruth_data'] = groundtruth_data
            labels['image_scales'] = image_scales
            return images, labels

        dataset = dataset.map(_process_example)
        dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
        #if self._use_fake_data:
        # Turn this dataset into a semi-fake dataset which always loop at the
        # first batch. This reduces variance in performance and is useful in
        # testing.
        #dataset = dataset.take(1).cache().repeat()
        return dataset
예제 #14
0
  def __call__(self, params):
    example_decoder = tf_example_decoder.TfExampleDecoder()

    def _parse_example(data):
      with tf.name_scope('augmentation'):
        source_id = data['source_id']
        image = data['image']  # dtype uint8
        raw_shape = tf.shape(image)
        boxes = data['groundtruth_boxes']
        classes = tf.reshape(data['groundtruth_classes'], [-1, 1])

        # Only 80 of the 90 COCO classes are used.
        class_map = tf.convert_to_tensor(constants.CLASS_MAP)
        classes = tf.gather(class_map, classes)
        classes = tf.cast(classes, dtype=tf.float32)

        if self._is_training:
          image, boxes, classes = ssd_crop(image, boxes, classes)
          # ssd_crop resizes and returns image of dtype float32 and does not
          # change its range (i.e., value in between 0--255). Divide by 255.
          # converts it to [0, 1] range. Not doing this before cropping to
          # avoid dtype cast (which incurs additional memory copy).
          image /= 255.0

          # random_horizontal_flip() is hard coded to flip with 50% chance.
          image, boxes = preprocessor.random_horizontal_flip(
              image=image, boxes=boxes)

          # TODO(shibow): Investigate the parameters for color jitter.
          image = color_jitter(
              image, brightness=0.125, contrast=0.5, saturation=0.5, hue=0.05)

          if params['dtype'] == 'bf16':
            image = tf.cast(image, dtype=tf.bfloat16)

          encoded_classes, encoded_boxes, num_matched_boxes = encode_labels(
              boxes, classes)

          # We transpose in dataloader instead of in the topology to save time
          encoded_classes, encoded_boxes = transpose_labels(encoded_classes, encoded_boxes)

          encoded_classes = tf.cast(encoded_classes, tf.int32)

          labels = {
              constants.NUM_MATCHED_BOXES: num_matched_boxes,
              constants.BOXES: encoded_boxes,
              constants.CLASSES: tf.squeeze(encoded_classes, axis=1),
          }
          # This is for dataloader visualization; actual model doesn't use this.
          if params['visualize_dataloader']:
            box_coder = faster_rcnn_box_coder.FasterRcnnBoxCoder(
                scale_factors=constants.BOX_CODER_SCALES)
            decoded_boxes = tf.expand_dims(box_coder.decode(
                rel_codes=tf.squeeze(encoded_boxes),
                anchors=box_list.BoxList(
                    tf.convert_to_tensor(DefaultBoxes()('ltrb')))
            ).get(), axis=0)
            labels['decoded_boxes'] = tf.squeeze(decoded_boxes)

          return image, labels

        else:
          image = tf.image.resize_images(
              image, size=(constants.IMAGE_SIZE, constants.IMAGE_SIZE))
          # resize_image returns image of dtype float32 and does not change its
          # range. Divide by 255 to convert image to [0, 1] range.
          image /= 255.

          if params['dtype'] == 'bf16':
            image = tf.cast(image, dtype=tf.bfloat16)

          def trim_and_pad(inp_tensor, dim_1):
            """Limit the number of boxes, and pad if necessary."""
            inp_tensor = inp_tensor[:constants.MAX_NUM_EVAL_BOXES]
            num_pad = constants.MAX_NUM_EVAL_BOXES - tf.shape(inp_tensor)[0]
            inp_tensor = tf.pad(inp_tensor, [[0, num_pad], [0, 0]])
            return tf.reshape(
                inp_tensor, [constants.MAX_NUM_EVAL_BOXES, dim_1])

          boxes, classes = trim_and_pad(boxes, 4), trim_and_pad(classes, 1)

          sample = {
              constants.IMAGE: image,
              constants.BOXES: boxes,
              constants.CLASSES: classes,
              constants.SOURCE_ID: tf.string_to_number(source_id, tf.int32),
              constants.RAW_SHAPE: raw_shape,
          }

          if not self._is_training and self._count > params['eval_samples']:
            sample[constants.IS_PADDED] = data[constants.IS_PADDED]
          return sample

    batch_size = params['batch_size']
    dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
    tf.logging.info("Dataset file pattern '%s': found %d files.", self._file_pattern, len(glob.glob(self._file_pattern)))

    if self._is_training:
        dataset_num_shards = params['num_shards']
        dataset_shard_index = params['shard_index']

        dataset = dataset.shard(dataset_num_shards,
                                dataset_shard_index)
        if self._is_training:
          dataset = dataset.shuffle(
              tf.cast(256 / dataset_num_shards, tf.int64))

    # Prefetch data from files.
    def _prefetch_dataset(filename):
      dataset = tf.data.TFRecordDataset(filename).prefetch(1)
      return dataset

    options = tf.data.Options()
    options.experimental_deterministic = not self._is_training
    dataset = dataset.interleave(
      map_func=_prefetch_dataset,
      cycle_length=32,
      block_length=1,
      num_parallel_calls=tf.data.experimental.AUTOTUNE).with_options(options)

    # Parse the fetched records to input tensors for model function.
    dataset = dataset.map(example_decoder.decode, num_parallel_calls=64)

    def _mark_is_padded(data):
      sample = data
      sample[constants.IS_PADDED] = tf.constant(True, dtype=tf.bool)
      return sample

    def _mark_is_not_padded(data):
      sample = data
      sample[constants.IS_PADDED] = tf.constant(False, dtype=tf.bool)
      return sample

    # Pad dataset to the desired size and mark if the data is padded.
    # During eval/predict, if local_batch_size * num_shards > 5000,
    # original dataset size won't be fit for computations on that number
    # of shards. In this case, will take
    # (local_batch_size - 5000 / num_shards) data from the original dataset
    # on each shard and mark the padded data as `is_padded`.
    # Also mark the original data as `not_padded`.
    # Append the padded data to the original dataset.
    if not self._is_training and self._count > params['eval_samples']:
      padded_dataset = dataset.map(_mark_is_padded)
      dataset = dataset.map(_mark_is_not_padded)
      dataset = dataset.concatenate(padded_dataset).take(
          self._count)

    if self._is_training:
      dataset = dataset.map(
          # pylint: disable=g-long-lambda
          lambda data: (data,
                        tf.greater(tf.shape(data['groundtruth_boxes'])[0], 0)),
          num_parallel_calls=64)
      dataset = dataset.filter(lambda data, pred: pred)
      # Prefetching and caching increases the memory usage, so disable when
      # using fake data.
      meminfo = dict((i.split()[0].rstrip(':'),int(i.split()[1])) for i in open('/proc/meminfo').readlines())
      mem_kib = meminfo['MemTotal']

      caching_mem_kib = len(glob.glob(self._file_pattern)) * 1000000 # rough approx. 1 GiB per tf-record

      if not self._use_fake_data:
        if caching_mem_kib > mem_kib:
          dataset = dataset.shuffle(64).repeat()
          tf.logging.info("Dataset cache OFF because MemTotal = %d KiB! It may decrease performance.", mem_kib)
        elif dataset_num_shards < 8:
          dataset = dataset.shuffle(64).repeat()
          tf.logging.info("Dataset cache OFF because it is not 8 node training! It may decrease performance.")
        else:
          dataset = dataset.cache().shuffle(64).repeat()

      dataset = dataset.map(lambda data, _: _parse_example(data), num_parallel_calls=64)
      dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)

    else:
      dataset = dataset.prefetch(batch_size * 64)
      dataset = dataset.map(_parse_example, num_parallel_calls=64)
      dataset = dataset.batch(batch_size=batch_size, drop_remainder=True)

    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)
    options = tf.data.Options()
    options.experimental_threading.max_intra_op_parallelism = 1
    options.experimental_threading.private_threadpool_size = 48
    dataset = dataset.with_options(options)

    if self._use_fake_data:
      dataset = dataset.take(1).cache().repeat()

    return dataset
예제 #15
0
파일: dataloader.py 프로젝트: thuyen/tpu
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                if params['skip_crowd_during_training'] and self._is_training:
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)

                input_processor = DetectionInputProcessor(
                    image, params['image_size'], boxes, classes)
                input_processor.normalize_image()
                if self._is_training and params['input_rand_hflip']:
                    input_processor.random_horizontal_flip()
                if self._is_training:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_output_size()
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()

                # Assign anchors.
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                # Pad groundtruth data for evaluation.
                image_scale = input_processor.image_scale_to_original
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, cls_targets, box_targets, num_positives,
                        source_id, image_scale, boxes, is_crowds, areas,
                        classes)

        batch_size = params['batch_size']
        dataset = tf.data.Dataset.list_files(self._file_pattern,
                                             shuffle=self._is_training,
                                             seed=tf.random.set_random_seed(
                                                 int(time.time() * 1e9)))
        if self._is_training:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(_prefetch_dataset,
                                                cycle_length=32,
                                                sloppy=self._is_training))
        if self._is_training:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
        dataset = dataset.batch(batch_size, drop_remainder=True)

        def _process_example(images, cls_targets, box_targets, num_positives,
                             source_ids, image_scales, boxes, is_crowds, areas,
                             classes):
            """Processes one batch of data."""
            labels = {}
            # Count num_positives in a batch.
            num_positives_batch = tf.reduce_mean(num_positives)
            labels['mean_num_positives'] = tf.reshape(
                tf.tile(tf.expand_dims(num_positives_batch, 0), [
                    batch_size,
                ]), [batch_size, 1])

            for level in range(params['min_level'], params['max_level'] + 1):
                labels['cls_targets_%d' % level] = cls_targets[level]
                labels['box_targets_%d' % level] = box_targets[level]
            # Concatenate groundtruth annotations to a tensor.
            groundtruth_data = tf.concat([boxes, is_crowds, areas, classes],
                                         axis=2)
            labels['source_ids'] = source_ids
            labels['groundtruth_data'] = groundtruth_data
            labels['image_scales'] = image_scales
            return images, labels

        dataset = dataset.map(_process_example)
        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
        return dataset
    def __call__(self, params):
        image_size = params['dynamic_image_size'] if params[
            'dynamic_input_shapes'] else (params['image_size'],
                                          params['image_size'])
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'], image_size)
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'],
                                               params['rpn_positive_overlap'],
                                               params['rpn_negative_overlap'],
                                               params['rpn_batch_size_per_im'],
                                               params['rpn_fg_fraction'])

        if params['dynamic_input_shapes']:
            height_long_side_image_size = image_size[::-1]
            height_long_side_input_anchors = anchors.Anchors(
                params['min_level'], params['max_level'], params['num_scales'],
                params['aspect_ratios'], params['anchor_scale'],
                height_long_side_image_size)
            height_long_side_anchor_labeler = anchors.AnchorLabeler(
                height_long_side_input_anchors, params['num_classes'],
                params['rpn_positive_overlap'], params['rpn_negative_overlap'],
                params['rpn_batch_size_per_im'], params['rpn_fg_fraction'])

        example_decoder = tf_example_decoder.TfExampleDecoder(
            use_instance_mask=True)

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        image: Image tensor that is preproessed to have normalized value and
          fixed dimension [image_size, image_size, 3]
        cls_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors]. The height_l and width_l
          represent the dimension of class logits at l-th level.
        box_targets_dict: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, num_anchors * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        num_positives: Number of positive anchors in the image.
        source_id: Source image id. Default value -1 if the source id is empty
          in the groundtruth annotation.
        image_scale: Scale of the proccessed image to the original image.
        boxes: Groundtruth bounding box annotations. The box is represented in
          [y1, x1, y2, x2] format. The tennsor is padded with -1 to the fixed
          dimension [self._max_num_instances, 4].
        is_crowds: Groundtruth annotations to indicate if an annotation
          represents a group of instances by value {0, 1}. The tennsor is
          padded with 0 to the fixed dimension [self._max_num_instances].
        areas: Groundtruth areas annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
        classes: Groundtruth classes annotations. The tennsor is padded with -1
          to the fixed dimension [self._max_num_instances].
      """
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                instance_masks = data['groundtruth_instance_masks']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                areas = data['groundtruth_area']
                is_crowds = data['groundtruth_is_crowd']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])
                if not params['use_category']:
                    classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

                if (params['skip_crowd_during_training']
                        and self._mode == tf.estimator.ModeKeys.TRAIN):
                    indices = tf.where(
                        tf.logical_not(data['groundtruth_is_crowd']))
                    classes = tf.gather_nd(classes, indices)
                    boxes = tf.gather_nd(boxes, indices)
                    instance_masks = tf.gather_nd(instance_masks, indices)

                input_processor = InstanceSegmentationInputProcessor(
                    image, image_size, params['short_side_image_size'],
                    params['long_side_max_image_size'], boxes, classes,
                    instance_masks)
                input_processor.normalize_image()
                if (self._mode == tf.estimator.ModeKeys.TRAIN
                        and params['input_rand_hflip']):
                    input_processor.random_horizontal_flip()
                if self._mode == tf.estimator.ModeKeys.TRAIN:
                    input_processor.set_training_random_scale_factors(
                        params['train_scale_min'], params['train_scale_max'])
                else:
                    input_processor.set_scale_factors_to_mlperf_reference_size(
                    )
                image = input_processor.resize_and_crop_image()
                boxes, classes = input_processor.resize_and_crop_boxes()
                instance_masks = input_processor.resize_and_crop_masks()
                cropped_gt_masks = input_processor.crop_gt_masks(
                    instance_masks, boxes, params['gt_mask_size'], image_size)

                # Assign anchors.
                if params['dynamic_input_shapes']:
                    is_height_short_side = tf.less(
                        input_processor._scaled_height,  # pylint: disable=protected-access
                        input_processor._scaled_width)  # pylint: disable=protected-access
                    score_targets, box_targets = tf.cond(
                        is_height_short_side,
                        lambda: anchor_labeler.label_anchors(boxes, classes),
                        lambda: height_long_side_anchor_labeler.label_anchors(boxes, classes))  # pylint: disable=line-too-long
                else:
                    score_targets, box_targets = anchor_labeler.label_anchors(
                        boxes, classes)

                source_id = tf.where(tf.equal(source_id, tf.constant('')),
                                     '-1', source_id)
                source_id = tf.string_to_number(source_id)

                image_scale = input_processor.image_scale_to_original
                scaled_height = input_processor.get_height_length()
                scaled_width = input_processor.get_width_length()
                image_info = tf.stack([
                    tf.to_float(scaled_height),
                    tf.to_float(scaled_width),
                    image_scale,
                    tf.to_float(input_processor.get_original_height),
                    tf.to_float(input_processor.get_original_width),
                ])
                # Pad groundtruth data for evaluation.
                boxes *= image_scale
                is_crowds = tf.cast(is_crowds, dtype=tf.float32)
                boxes = pad_to_fixed_size(boxes, -1,
                                          [self._max_num_instances, 4])
                is_crowds = pad_to_fixed_size(is_crowds, 0,
                                              [self._max_num_instances, 1])
                areas = pad_to_fixed_size(areas, -1,
                                          [self._max_num_instances, 1])
                classes = pad_to_fixed_size(classes, -1,
                                            [self._max_num_instances, 1])
                # Pads cropped_gt_masks.
                cropped_gt_masks = tf.reshape(cropped_gt_masks,
                                              [self._max_num_instances, -1])
                cropped_gt_masks = pad_to_fixed_size(
                    cropped_gt_masks, -1,
                    [self._max_num_instances, (params['gt_mask_size'] + 4)**2])
                cropped_gt_masks = tf.reshape(cropped_gt_masks, [
                    self._max_num_instances, params['gt_mask_size'] + 4,
                    params['gt_mask_size'] + 4
                ])

                if params['use_bfloat16']:
                    image = tf.cast(image, dtype=tf.bfloat16)
                return (image, score_targets, box_targets, source_id,
                        image_info, boxes, is_crowds, areas, classes,
                        cropped_gt_masks)

        # batch_size = params['batch_size']
        batch_size = params['batch_size'] if 'batch_size' in params else 1
        dataset = tf.data.Dataset.list_files(
            self._file_pattern,
            shuffle=(self._mode == tf.estimator.ModeKeys.TRAIN))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.repeat()

        # Prefetch data from files.
        def _prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(
                _prefetch_dataset,
                cycle_length=32,
                sloppy=(self._mode == tf.estimator.ModeKeys.TRAIN)))
        if self._mode == tf.estimator.ModeKeys.TRAIN:
            dataset = dataset.shuffle(64)

        # Parse the fetched records to input tensors for model function.
        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)

        if params['dynamic_input_shapes']:

            def key_func(image, *args):
                del args
                return tf.cast(tf.shape(image)[0], dtype=tf.int64)

            def reduce_func(unused_key, dataset):
                return dataset.batch(batch_size, drop_remainder=True)

            dataset = dataset.apply(
                tf.contrib.data.group_by_window(
                    key_func=key_func,
                    reduce_func=reduce_func,
                    window_size=params['global_batch_size']))
        else:
            dataset = dataset.prefetch(batch_size)
            dataset = dataset.batch(batch_size, drop_remainder=True)

        def _process_example(images, score_targets, box_targets, source_ids,
                             image_info, boxes, is_crowds, areas, classes,
                             cropped_gt_masks):
            """Processes one batch of data."""
            # Transposes images from (N, H, W, C)->(H, W, N, C). As batch size is
            # less than 8, the batch goes to the second minor dimension.
            if (params['transpose_input']
                    and self._mode == tf.estimator.ModeKeys.TRAIN):
                images = tf.transpose(images, [1, 2, 0, 3])

            labels = {}
            for level in range(params['min_level'], params['max_level'] + 1):
                labels['score_targets_%d' % level] = score_targets[level]
                labels['box_targets_%d' % level] = box_targets[level]
            # Concatenate groundtruth annotations to a tensor.
            groundtruth_data = tf.concat([boxes, is_crowds, areas, classes],
                                         axis=2)
            labels['source_ids'] = source_ids
            labels['groundtruth_data'] = groundtruth_data
            labels['image_info'] = image_info
            labels['cropped_gt_masks'] = cropped_gt_masks
            if self._mode == tf.estimator.ModeKeys.PREDICT:
                features = dict(images=images,
                                image_info=image_info,
                                groundtruth_data=groundtruth_data,
                                source_ids=source_ids)
                return features
            elif params['dynamic_input_shapes']:
                # For dynamic input shapes, we have 2 TPU programs. A tf.cond op is run
                # on the host side to decide which TPU program to launch. As we have
                # data prefetch in device side, the data for evaluating the shape needs
                # to sent back from device to host. Thus we retun `images` shape here
                # explictly to avoid copy the entire `images` back.
                return tf.shape(images), images, labels
            else:
                return images, labels

        dataset = dataset.map(_process_example)
        dataset = dataset.prefetch(tf.contrib.data.AUTOTUNE)
        return dataset
예제 #17
0
파일: dataloader.py 프로젝트: jhseu/tpu
    def __call__(self, params):
        input_anchors = anchors.Anchors(params['min_level'],
                                        params['max_level'],
                                        params['num_scales'],
                                        params['aspect_ratios'],
                                        params['anchor_scale'],
                                        params['image_size'])
        anchor_labeler = anchors.AnchorLabeler(input_anchors,
                                               params['num_classes'])
        example_decoder = tf_example_decoder.TfExampleDecoder()

        def get_dataset_for_mode(data_dir, is_training):
            """Return the location of input samples for a given mode."""
            if is_training:
                return '%s/coco_train2017_nocrowd-*' % data_dir
            return '%s/coco_val2017-*' % data_dir

        def _dataset_parser(value):
            """Parse data to a fixed dimension input image and learning targets."""
            with tf.name_scope('parser'):
                data = example_decoder.decode(value)
                source_id = data['source_id']
                image = data['image']
                boxes = data['groundtruth_boxes']
                classes = data['groundtruth_classes']
                classes = tf.reshape(tf.cast(classes, dtype=tf.float32),
                                     [-1, 1])

                # the image normalization is identical to Cloud TPU ResNet-50
                image = tf.image.convert_image_dtype(image, dtype=tf.float32)
                image = _normalize_image(image)

                if params['input_rand_hflip']:
                    image, boxes = preprocessor.random_horizontal_flip(
                        image, boxes=boxes)
                image_original_shape = tf.shape(image)
                image, _ = preprocessor.resize_to_range(
                    image,
                    min_dimension=params['image_size'],
                    max_dimension=params['image_size'])
                image_scale = tf.to_float(
                    image_original_shape[0]) / tf.to_float(tf.shape(image)[0])
                image, boxes = preprocessor.scale_boxes_to_pixel_coordinates(
                    image, boxes, keypoints=None)

                image = tf.image.pad_to_bounding_box(image, 0, 0,
                                                     params['image_size'],
                                                     params['image_size'])
                (cls_targets, box_targets,
                 num_positives) = anchor_labeler.label_anchors(boxes, classes)

                source_id = tf.string_to_number(source_id, out_type=tf.float32)
                row = (image, cls_targets, box_targets, num_positives,
                       source_id, image_scale)
                return row

        batch_size = params['batch_size']

        data_file_pattern = get_dataset_for_mode(self._data_dir,
                                                 self._is_training)
        dataset = tf.data.Dataset.list_files(data_file_pattern)

        dataset = dataset.shuffle(buffer_size=1024)
        if self._is_training:
            dataset = dataset.repeat()

        def prefetch_dataset(filename):
            dataset = tf.data.TFRecordDataset(filename).prefetch(1)
            return dataset

        dataset = dataset.apply(
            tf.contrib.data.parallel_interleave(prefetch_dataset,
                                                cycle_length=32,
                                                sloppy=True))
        dataset = dataset.shuffle(20)

        dataset = dataset.map(_dataset_parser, num_parallel_calls=64)
        dataset = dataset.prefetch(batch_size)
        dataset = dataset.apply(
            tf.contrib.data.batch_and_drop_remainder(batch_size))
        dataset = dataset.prefetch(1)

        (images, cls_targets, box_targets, num_positives, source_ids,
         image_scales) = dataset.make_one_shot_iterator().get_next()
        labels = {}
        # count num_positives in a batch
        num_positives_batch = tf.reduce_mean(num_positives)
        labels['mean_num_positives'] = tf.reshape(
            tf.tile(tf.expand_dims(num_positives_batch, 0), [
                batch_size,
            ]), [batch_size, 1])

        for level in range(params['min_level'], params['max_level'] + 1):
            labels['cls_targets_%d' % level] = cls_targets[level]
            labels['box_targets_%d' % level] = box_targets[level]
        labels['source_ids'] = source_ids
        labels['image_scales'] = image_scales
        return images, labels
  def __call__(self, params, num_examples=0):
    image_size = params['image_size']
    input_anchors = anchors.Anchors(
        params['min_level'], params['max_level'], params['num_scales'],
        params['aspect_ratios'], params['anchor_scale'], image_size)
    anchor_labeler = anchors.AnchorLabeler(
        input_anchors, params['num_classes'], params['rpn_positive_overlap'],
        params['rpn_negative_overlap'], params['rpn_batch_size_per_im'],
        params['rpn_fg_fraction'])

    height_long_side_image_size = image_size[::-1]
    height_long_side_input_anchors = anchors.Anchors(
        params['min_level'], params['max_level'], params['num_scales'],
        params['aspect_ratios'], params['anchor_scale'],
        height_long_side_image_size)
    height_long_side_anchor_labeler = anchors.AnchorLabeler(
        height_long_side_input_anchors, params['num_classes'],
        params['rpn_positive_overlap'], params['rpn_negative_overlap'],
        params['rpn_batch_size_per_im'], params['rpn_fg_fraction'])

    example_decoder = tf_example_decoder.TfExampleDecoder(
        use_instance_mask=True)

    def _dataset_parser(value):
      """Parse data to a fixed dimension input image and learning targets.

      Args:
        value: A dictionary contains an image and groundtruth annotations.

      Returns:
        features: A dictionary that contains the image and auxiliary
          information. The following describes {key: value} pairs in the
          dictionary.
          image: An image tensor that is preprocessed to have normalized value
            and fixed dimension [image_size, image_size, 3]
          image_info: Image information that includes the original height and
            width, the scale of the processed image to the original image, and
            the scaled height and width.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
        labels: (only for training) A dictionary that contains groundtruth
          labels. The following describes {key: value} pairs in the dictionary.
          score_targets_dict: An ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors]. The height_l and width_l
            represent the dimension of objectiveness score at l-th level.
          box_targets_dict: An ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, num_anchors * 4]. The height_l and
            width_l represent the dimension of bounding box regression output at
            l-th level.
          gt_boxes: Groundtruth bounding box annotations. The box is represented
             in [y1, x1, y2, x2] format. The tennsor is padded with -1 to the
             fixed dimension [self._max_num_instances, 4].
          gt_classes: Groundtruth classes annotations. The tennsor is padded
            with -1 to the fixed dimension [self._max_num_instances].
          cropped_gt_masks: Groundtruth masks cropped by the bounding box and
            resized to a fixed size determined by params['gt_mask_size']
      """
      with tf.name_scope('parser'):
        data = example_decoder.decode(value)

        image = data['image']
        source_id = data['source_id']
        source_id = tf.where(tf.equal(source_id, tf.constant('')), '-1',
                             source_id)
        source_id = tf.string_to_number(source_id)

        if self._mode == tf.estimator.ModeKeys.PREDICT:
          input_processor = InstanceSegmentationInputProcessor(
              image, image_size, params['short_side_image_size'],
              params['long_side_max_image_size'])
          input_processor.normalize_image()
          input_processor.set_scale_factors_to_mlperf_reference_size()
          image = input_processor.resize_and_crop_image()
          if params['use_bfloat16']:
            image = tf.cast(image, dtype=tf.bfloat16)

          image_info = input_processor.get_image_info()
          return {'images': image, 'image_info': image_info,
                  'source_ids': source_id}

        # The following part is for training.
        instance_masks = data['groundtruth_instance_masks']
        boxes = data['groundtruth_boxes']
        classes = data['groundtruth_classes']
        classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1])
        if not params['use_category']:
          classes = tf.cast(tf.greater(classes, 0), dtype=tf.float32)

        if (params['skip_crowd_during_training'] and
            self._mode == tf.estimator.ModeKeys.TRAIN):
          indices = tf.where(tf.logical_not(data['groundtruth_is_crowd']))
          classes = tf.gather_nd(classes, indices)
          boxes = tf.gather_nd(boxes, indices)
          instance_masks = tf.gather_nd(instance_masks, indices)

        input_processor = InstanceSegmentationInputProcessor(
            image, image_size, params['short_side_image_size'],
            params['long_side_max_image_size'], boxes, classes,
            instance_masks)
        input_processor.normalize_image()
        if params['input_rand_hflip']:
          input_processor.random_horizontal_flip()

        input_processor.set_scale_factors_to_mlperf_reference_size()
        image = input_processor.resize_and_crop_image()

        boxes, classes = input_processor.resize_and_crop_boxes()
        cropped_gt_masks = input_processor.crop_gt_masks(
            params['gt_mask_size'])

        image_info = input_processor.get_image_info()
        # Assign anchors.
        is_height_short_side = tf.less(image_info[3], image_info[4])
        score_targets, box_targets = tf.cond(
            is_height_short_side,
            lambda: anchor_labeler.label_anchors(boxes, classes),
            lambda: height_long_side_anchor_labeler.label_anchors(boxes, classes))  # pylint: disable=line-too-long

        # Pad groundtruth data.
        boxes *= image_info[2]
        boxes = pad_to_fixed_size(boxes, -1, [self._max_num_instances, 4])
        classes = pad_to_fixed_size(classes, -1, [self._max_num_instances, 1])
        # Pads cropped_gt_masks.
        cropped_gt_masks = tf.reshape(
            cropped_gt_masks, [-1, (params['gt_mask_size'] + 4) ** 2])
        cropped_gt_masks = pad_to_fixed_size(
            cropped_gt_masks, -1,
            [self._max_num_instances, (params['gt_mask_size'] + 4) ** 2])
        cropped_gt_masks = tf.reshape(
            cropped_gt_masks,
            [self._max_num_instances, params['gt_mask_size'] + 4,
             params['gt_mask_size'] + 4])
        if params['use_bfloat16']:
          image = tf.cast(image, dtype=tf.bfloat16)

        features = {}
        features['images'] = image
        features['image_info'] = image_info
        features['source_ids'] = source_id

        labels = {}
        for level in range(params['min_level'], params['max_level'] + 1):
          labels['score_targets_%d' % level] = score_targets[level]
          labels['box_targets_%d' % level] = box_targets[level]
        labels['gt_boxes'] = boxes
        labels['gt_classes'] = classes
        labels['cropped_gt_masks'] = cropped_gt_masks
        return features, labels

    batch_size = params['batch_size'] if 'batch_size' in params else 1
    dataset = tf.data.Dataset.list_files(self._file_pattern, shuffle=False)
    if self._mode == tf.estimator.ModeKeys.TRAIN:
      # shard and shuffle the image files so each shard has distinctive and
      # random set of images.
      # To improve model convergence under large number of hosts, multiple hosts
      # may share a same dataset shard. This allows a host to get more training
      # images.
      if 'dataset_num_shards' in params:
        train_actual_num_shards = int(params['dataset_num_shards'] //
                                      params['hosts_per_dataset_shard'])
        dataset = dataset.shard(
            train_actual_num_shards,
            int(params['dataset_shard_id'] //
                params['hosts_per_dataset_shard']))
        dataset = dataset.shuffle(tf.to_int64(256 // train_actual_num_shards))

    # Prefetch data from files.
    def _prefetch_dataset(filename):
      dataset = tf.data.TFRecordDataset(filename).prefetch(1)
      return dataset

    dataset = dataset.apply(
        tf.data.experimental.parallel_interleave(
            _prefetch_dataset, cycle_length=32,
            sloppy=(self._mode == tf.estimator.ModeKeys.TRAIN)))
    if self._mode == tf.estimator.ModeKeys.TRAIN:
      # Cache the raw images and shuffle them with a resonably large buffer.
      dataset = dataset.cache().shuffle(params['shuffle_buffer_size']).repeat()

    if self._distributed_eval:
      dataset = dataset.shard(params['dataset_num_shards'],
                              params['dataset_shard_id'])

    # Parse the fetched records to input tensors for model function.
    dataset = dataset.map(_dataset_parser, num_parallel_calls=64)

    def horizontal_image(*args):
      image_info = args[0]['image_info']
      return tf.less(image_info[3], image_info[4])

    def vertical_image(*args):
      return tf.logical_not(horizontal_image(*args))

    # Pad dataset to the desired size and mark if the dataset is padding.
    # During PREDICT, if batch_size_per_shard * num_shards > 5000, the
    # original dataset size won't be evenly divisible by the number of shards.
    # Note that 5000 is the number of eval samples in COCO dataset. In this
    # case, the eval dataset will take (batch_per_shard * num_shards - 5000)
    # samples from the original dataset and mark those extra samples as
    # `is_padding` and the original data as `is_not_padding`. This ensures
    # correctness of evaluation on only 5000 samples.
    # Appends the dataset padding to the original dataset (only in PREDICT).
    if (self._mode == tf.estimator.ModeKeys.PREDICT and
        num_examples > params['eval_samples']):
      def _mark_is_padding(features):
        features[mask_rcnn_params.IS_PADDING] = tf.constant(
            True, dtype=tf.bool, shape=[1])
        return features

      def _mark_is_not_padding(features):
        features[mask_rcnn_params.IS_PADDING] = tf.constant(
            False, dtype=tf.bool, shape=[1])
        return features
      dataset_padding = dataset
      # padd equal number of horizontal and vertical images and interleave them.
      pad_size = int(math.ceil(num_examples - params['eval_samples']))
      dataset_padding_hor = dataset_padding.filter(horizontal_image).map(
          _mark_is_padding).take(pad_size)
      dataset_padding_ver = dataset_padding.filter(vertical_image).map(
          _mark_is_padding).take(pad_size)
      interleaved_dataset_padding = tf.data.experimental.choose_from_datasets(
          [dataset_padding_hor, dataset_padding_ver],
          tf.data.Dataset.range(2).repeat(pad_size))
      if self._distributed_eval:
        dataset = dataset.map(_mark_is_not_padding).take(
            int(
                math.ceil(params['eval_samples'] /
                          params['dataset_num_shards'])))
      else:
        dataset = dataset.map(_mark_is_not_padding).take(params['eval_samples'])
      dataset = dataset.concatenate(interleaved_dataset_padding)

    def key_func(*args):
      return tf.cast(horizontal_image(*args), dtype=tf.int64)

    def reduce_func(unused_key, dataset):
      return dataset.batch(batch_size, drop_remainder=True)

    dataset = dataset.apply(
        tf.data.experimental.group_by_window(
            key_func=key_func,
            reduce_func=reduce_func,
            window_size=(params['batch_size'] *
                         params['replicas_per_worker'])))

    dataset = dataset.map(
        functools.partial(self._transform_images, params),
        num_parallel_calls=16)

    dataset = dataset.prefetch(tf.data.experimental.AUTOTUNE)

    if (self._mode == tf.estimator.ModeKeys.TRAIN and
        num_examples > 0):
      dataset = dataset.take(num_examples)
    # Make eval dataset repeat to get rid of eval dataset init per epoch.
    if self._distributed_eval:
      dataset = dataset.take(
          int(num_examples / params['dataset_num_shards'] /
              params['batch_size'])).cache().repeat()
    if self._use_fake_data:
      # Turn this dataset into a semi-fake dataset which always loop at the
      # first batch. This reduces variance in performance and is useful in
      # testing.
      dataset = dataset.take(1).cache().repeat()

    options = tf.data.Options()
    options.experimental_threading.max_intra_op_parallelism = 1
    dataset = dataset.with_options(options)

    return dataset