Ejemplo n.º 1
0
    def _build_inputs(self, image):
        """Builds classification model inputs for serving."""
        model_params = self._params.task.model
        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image,
                                               offset=MEAN_RGB,
                                               scale=STDDEV_RGB)

        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._input_image_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._input_image_size, 2**model_params.max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)

        image_shape = image_info[1, :]  # Shape of original image.

        input_anchor = anchor.build_anchor_generator(
            min_level=model_params.min_level,
            max_level=model_params.max_level,
            num_scales=model_params.anchor.num_scales,
            aspect_ratios=model_params.anchor.aspect_ratios,
            anchor_size=model_params.anchor.anchor_size)
        anchor_boxes = input_anchor(image_size=(self._input_image_size[0],
                                                self._input_image_size[1]))

        return image, anchor_boxes, image_shape
Ejemplo n.º 2
0
    def testLabelAnchors(self, min_level, max_level, num_scales, aspect_ratios,
                         anchor_size):
        input_size = [512, 512]
        ground_truth_class_id = 2

        # The matched anchors are the anchors used as ground truth and the anchors
        # at the next octave scale on the same location.
        expected_anchor_locations = [[0, 0, 0], [0, 0, 1]]
        anchor_gen = anchor.build_anchor_generator(min_level, max_level,
                                                   num_scales, aspect_ratios,
                                                   anchor_size)
        anchor_boxes = anchor_gen(input_size)
        anchor_labeler = anchor.AnchorLabeler()

        # Uses the first anchors as ground truth. The ground truth should map to
        # two anchors with two intermediate scales at the same location.
        gt_boxes = anchor_boxes['3'][0:1, 0, 0:4]
        gt_classes = tf.constant([[ground_truth_class_id]], dtype=tf.float32)
        (cls_targets, box_targets, _,
         box_weights) = anchor_labeler.label_anchors(anchor_boxes, gt_boxes,
                                                     gt_classes)

        for k, v in cls_targets.items():
            cls_targets[k] = v.numpy()
        for k, v in box_targets.items():
            box_targets[k] = v.numpy()
        box_weights = box_weights.numpy()

        anchor_locations = np.vstack(
            np.where(cls_targets[str(min_level)] > -1)).transpose()
        self.assertAllClose(expected_anchor_locations, anchor_locations)
        # Two anchor boxes on min_level got matched to the gt_boxes.
        self.assertAllClose(tf.reduce_sum(box_weights), 2)
Ejemplo n.º 3
0
 def _build_anchor_boxes(self):
     """Builds and returns anchor boxes."""
     model_params = self.params.task.model
     input_anchor = anchor.build_anchor_generator(
         min_level=model_params.min_level,
         max_level=model_params.max_level,
         num_scales=model_params.anchor.num_scales,
         aspect_ratios=model_params.anchor.aspect_ratios,
         anchor_size=model_params.anchor.anchor_size)
     return input_anchor(image_size=(self._input_image_size[0],
                                     self._input_image_size[1]))
Ejemplo n.º 4
0
    def testEquivalentResult(self, min_level, max_level, aspect_ratios,
                             num_scales, anchor_size, image_size):
        anchor_gen = anchor.build_anchor_generator(min_level=min_level,
                                                   max_level=max_level,
                                                   num_scales=num_scales,
                                                   aspect_ratios=aspect_ratios,
                                                   anchor_size=anchor_size)
        anchors = anchor_gen(image_size)
        expected_anchor_gen = anchor.Anchor(min_level, max_level, num_scales,
                                            aspect_ratios, anchor_size,
                                            image_size)

        expected_anchors = expected_anchor_gen.multilevel_boxes
        for k in expected_anchors.keys():
            self.assertAllClose(expected_anchors[k], anchors[k])
Ejemplo n.º 5
0
    def test_forward(self, strategy, image_size, training, has_att_heads):
        """Test for creation of a R50-FPN RetinaNet."""
        tf.keras.backend.set_image_data_format('channels_last')
        num_classes = 3
        min_level = 3
        max_level = 7
        num_scales = 3
        aspect_ratios = [1.0]
        num_anchors_per_location = num_scales * len(aspect_ratios)

        images = np.random.rand(2, image_size[0], image_size[1], 3)
        image_shape = np.array([[image_size[0], image_size[1]],
                                [image_size[0], image_size[1]]])

        with strategy.scope():
            anchor_gen = anchor.build_anchor_generator(
                min_level=min_level,
                max_level=max_level,
                num_scales=num_scales,
                aspect_ratios=aspect_ratios,
                anchor_size=3)
            anchor_boxes = anchor_gen(image_size)
            for l in anchor_boxes:
                anchor_boxes[l] = tf.tile(
                    tf.expand_dims(anchor_boxes[l], axis=0), [2, 1, 1, 1])

            backbone = resnet.ResNet(model_id=50)
            decoder = fpn.FPN(input_specs=backbone.output_specs,
                              min_level=min_level,
                              max_level=max_level)

            if has_att_heads:
                attribute_heads = {'depth': ('regression', 1)}
            else:
                attribute_heads = None
            head = dense_prediction_heads.RetinaNetHead(
                min_level=min_level,
                max_level=max_level,
                num_classes=num_classes,
                attribute_heads=attribute_heads,
                num_anchors_per_location=num_anchors_per_location)
            generator = detection_generator.MultilevelDetectionGenerator(
                max_num_detections=10)
            model = retinanet_model.RetinaNetModel(
                backbone=backbone,
                decoder=decoder,
                head=head,
                detection_generator=generator)

            model_outputs = model(images,
                                  image_shape,
                                  anchor_boxes,
                                  training=training)

        if training:
            cls_outputs = model_outputs['cls_outputs']
            box_outputs = model_outputs['box_outputs']
            att_outputs = model_outputs['att_outputs']
            for level in range(min_level, max_level + 1):
                self.assertIn(str(level), cls_outputs)
                self.assertIn(str(level), box_outputs)
                self.assertAllEqual([
                    2, image_size[0] // 2**level, image_size[1] // 2**level,
                    num_classes * num_anchors_per_location
                ], cls_outputs[str(level)].numpy().shape)
                self.assertAllEqual([
                    2, image_size[0] // 2**level, image_size[1] // 2**level,
                    4 * num_anchors_per_location
                ], box_outputs[str(level)].numpy().shape)
                if has_att_heads:
                    for att in att_outputs.values():
                        self.assertAllEqual([
                            2, image_size[0] // 2**level, image_size[1] //
                            2**level, 1 * num_anchors_per_location
                        ], att[str(level)].numpy().shape)
        else:
            self.assertIn('detection_boxes', model_outputs)
            self.assertIn('detection_scores', model_outputs)
            self.assertIn('detection_classes', model_outputs)
            self.assertIn('detection_attributes', model_outputs)
            self.assertIn('num_detections', model_outputs)
            self.assertAllEqual([2, 10, 4],
                                model_outputs['detection_boxes'].numpy().shape)
            self.assertAllEqual(
                [2, 10], model_outputs['detection_scores'].numpy().shape)
            self.assertAllEqual(
                [2, 10], model_outputs['detection_classes'].numpy().shape)
            self.assertAllEqual([
                2,
            ], model_outputs['num_detections'].numpy().shape)
            if has_att_heads:
                self.assertAllEqual([2, 10, 1],
                                    model_outputs['detection_attributes']
                                    ['depth'].numpy().shape)
Ejemplo n.º 6
0
    def _parse_train_data(self, data):
        """Parses data for training.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      image: image tensor that is preproessed to have normalized value and
        dimension [output_size[0], output_size[1], 3]
      labels: a dictionary of tensors used for training. The following describes
        {key: value} pairs in the dictionary.
        image_info: a 2D `Tensor` that encodes the information of the image and
          the applied preprocessing. It is in the format of
          [[original_height, original_width], [scaled_height, scaled_width],
        anchor_boxes: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, 4] representing anchor boxes at each level.
        rpn_score_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location]. The height_l and
          width_l represent the dimension of class logits at l-th level.
        rpn_box_targets: ordered dictionary with keys
          [min_level, min_level+1, ..., max_level]. The values are tensor with
          shape [height_l, width_l, anchors_per_location * 4]. The height_l and
          width_l represent the dimension of bounding box regression output at
          l-th level.
        gt_boxes: Groundtruth bounding box annotations. The box is represented
           in [y1, x1, y2, x2] format. The coordinates are w.r.t the scaled
           image that is fed to the network. The tennsor is padded with -1 to
           the fixed dimension [self._max_num_instances, 4].
        gt_classes: Groundtruth classes annotations. The tennsor is padded
          with -1 to the fixed dimension [self._max_num_instances].
        gt_masks: groundtrugh masks cropped by the bounding box and
          resized to a fixed size determined by mask_crop_size.
    """
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        if self._include_mask:
            masks = data['groundtruth_instance_masks']

        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtruths = tf.shape(classes)[0]
            with tf.control_dependencies([num_groundtruths, is_crowds]):
                indices = tf.cond(
                    tf.greater(tf.size(is_crowds), 0),
                    lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    lambda: tf.cast(tf.range(num_groundtruths), tf.int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            if self._include_mask:
                masks = tf.gather(masks, indices)

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            if self._include_mask:
                image, boxes, masks = preprocess_ops.random_horizontal_flip(
                    image, boxes, masks)
            else:
                image, boxes, _ = preprocess_ops.random_horizontal_flip(
                    image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        # Now the coordinates of boxes are w.r.t. the original image.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        # Now the coordinates of boxes are w.r.t the scaled image.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)

        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        if self._include_mask:
            masks = tf.gather(masks, indices)
            # Transfer boxes to the original image space and do normalization.
            cropped_boxes = boxes + tf.tile(tf.expand_dims(offset, axis=0),
                                            [1, 2])
            cropped_boxes /= tf.tile(tf.expand_dims(image_scale, axis=0),
                                     [1, 2])
            cropped_boxes = box_ops.normalize_boxes(cropped_boxes, image_shape)
            num_masks = tf.shape(masks)[0]
            masks = tf.image.crop_and_resize(
                tf.expand_dims(masks, axis=-1),
                cropped_boxes,
                box_indices=tf.range(num_masks, dtype=tf.int32),
                crop_size=[self._mask_crop_size, self._mask_crop_size],
                method='bilinear')
            masks = tf.squeeze(masks, axis=-1)

        # Assigns anchor targets.
        # Note that after the target assignment, box targets are absolute pixel
        # offsets w.r.t. the scaled image.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.RpnAnchorLabeler(self._rpn_match_threshold,
                                                 self._rpn_unmatched_threshold,
                                                 self._rpn_batch_size_per_im,
                                                 self._rpn_fg_fraction)
        rpn_score_targets, rpn_box_targets = anchor_labeler.label_anchors(
            anchor_boxes, boxes,
            tf.cast(tf.expand_dims(classes, axis=-1), dtype=tf.float32))

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'anchor_boxes':
            anchor_boxes,
            'image_info':
            image_info,
            'rpn_score_targets':
            rpn_score_targets,
            'rpn_box_targets':
            rpn_box_targets,
            'gt_boxes':
            preprocess_ops.clip_or_pad_to_fixed_size(boxes,
                                                     self._max_num_instances,
                                                     -1),
            'gt_classes':
            preprocess_ops.clip_or_pad_to_fixed_size(classes,
                                                     self._max_num_instances,
                                                     -1),
        }
        if self._include_mask:
            labels['gt_masks'] = preprocess_ops.clip_or_pad_to_fixed_size(
                masks, self._max_num_instances, -1)

        return image, labels
Ejemplo n.º 7
0
    def _parse_eval_data(self, data):
        """Parses data for evaluation.

    Args:
      data: the decoded tensor dictionary from TfExampleDecoder.

    Returns:
      A dictionary of {'images': image, 'labels': labels} where
        image: image tensor that is preproessed to have normalized value and
          dimension [output_size[0], output_size[1], 3]
        labels: a dictionary of tensors used for training. The following
          describes {key: value} pairs in the dictionary.
          source_ids: Source image id. Default value -1 if the source id is
            empty in the groundtruth annotation.
          image_info: a 2D `Tensor` that encodes the information of the image
            and the applied preprocessing. It is in the format of
            [[original_height, original_width], [scaled_height, scaled_width],
          anchor_boxes: ordered dictionary with keys
            [min_level, min_level+1, ..., max_level]. The values are tensor with
            shape [height_l, width_l, 4] representing anchor boxes at each
            level.
    """
        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Casts input image to self._dtype
        image = tf.cast(image, dtype=self._dtype)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(data['groundtruth_boxes'],
                                          image_shape)

        # Compute Anchor boxes.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))

        labels = {
            'image_info': image_info,
            'anchor_boxes': anchor_boxes,
        }

        groundtruths = {
            'source_id': data['source_id'],
            'height': data['height'],
            'width': data['width'],
            'num_detections': tf.shape(data['groundtruth_classes'])[0],
            'boxes': boxes,
            'classes': data['groundtruth_classes'],
            'areas': data['groundtruth_area'],
            'is_crowds': tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)
        labels['groundtruths'] = groundtruths
        return image, labels
Ejemplo n.º 8
0
    def _parse_eval_data(self, data):
        """Parses data for training and evaluation."""
        groundtruths = {}
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})

        # Gets original image and its size.
        image = data['image']
        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=1.0,
            aug_scale_max=1.0)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Sets up groundtruth data for evaluation.
        groundtruths = {
            'source_id':
            data['source_id'],
            'height':
            data['height'],
            'width':
            data['width'],
            'num_detections':
            tf.shape(data['groundtruth_classes']),
            'image_info':
            image_info,
            'boxes':
            box_ops.denormalize_boxes(data['groundtruth_boxes'], image_shape),
            'classes':
            data['groundtruth_classes'],
            'areas':
            data['groundtruth_area'],
            'is_crowds':
            tf.cast(data['groundtruth_is_crowd'], tf.int32),
        }
        if 'groundtruth_attributes' in data:
            groundtruths['attributes'] = data['groundtruth_attributes']
        groundtruths['source_id'] = utils.process_source_id(
            groundtruths['source_id'])
        groundtruths = utils.pad_groundtruths_to_fixed_size(
            groundtruths, self._max_num_instances)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
            'groundtruths': groundtruths,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Ejemplo n.º 9
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        # If not empty, `attributes` is a dict of (name, ground_truth) pairs.
        # `ground_gruth` of attributes is assumed in shape [N, attribute_size].
        # TODO(xianzhi): support parsing attributes weights.
        attributes = data.get('groundtruth_attributes', {})
        is_crowds = data['groundtruth_is_crowd']

        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)
            for k, v in attributes.items():
                attributes[k] = tf.gather(v, indices)

        # Gets original image.
        image = data['image']

        # Apply autoaug or randaug.
        if self._augmenter is not None:
            image, boxes = self._augmenter.distort_with_boxes(image, boxes)

        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)
        for k, v in attributes.items():
            attributes[k] = tf.gather(v, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, att_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1), attributes)

        # Casts input image to desired data type.
        image = tf.cast(image, dtype=self._dtype)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        if att_targets:
            labels['attribute_targets'] = att_targets
        return image, labels
Ejemplo n.º 10
0
    def _parse_train_data(self, data):
        """Parses data for training and evaluation."""
        classes = data['groundtruth_classes']
        boxes = data['groundtruth_boxes']
        is_crowds = data['groundtruth_is_crowd']
        # Skips annotations with `is_crowd` = True.
        if self._skip_crowd_during_training:
            num_groundtrtuhs = tf.shape(input=classes)[0]
            with tf.control_dependencies([num_groundtrtuhs, is_crowds]):
                indices = tf.cond(
                    pred=tf.greater(tf.size(input=is_crowds), 0),
                    true_fn=lambda: tf.where(tf.logical_not(is_crowds))[:, 0],
                    false_fn=lambda: tf.cast(tf.range(num_groundtrtuhs), tf.
                                             int64))
            classes = tf.gather(classes, indices)
            boxes = tf.gather(boxes, indices)

        # Gets original image and its size.
        image = data['image']

        image_shape = tf.shape(input=image)[0:2]

        # Normalizes image with mean and std pixel values.
        image = preprocess_ops.normalize_image(image)

        # Flips image randomly during training.
        if self._aug_rand_hflip:
            image, boxes, _ = preprocess_ops.random_horizontal_flip(
                image, boxes)

        # Converts boxes from normalized coordinates to pixel coordinates.
        boxes = box_ops.denormalize_boxes(boxes, image_shape)

        # Resizes and crops image.
        image, image_info = preprocess_ops.resize_and_crop_image(
            image,
            self._output_size,
            padded_size=preprocess_ops.compute_padded_size(
                self._output_size, 2**self._max_level),
            aug_scale_min=self._aug_scale_min,
            aug_scale_max=self._aug_scale_max)
        image_height, image_width, _ = image.get_shape().as_list()

        # Resizes and crops boxes.
        image_scale = image_info[2, :]
        offset = image_info[3, :]
        boxes = preprocess_ops.resize_and_crop_boxes(boxes, image_scale,
                                                     image_info[1, :], offset)
        # Filters out ground truth boxes that are all zeros.
        indices = box_ops.get_non_empty_box_indices(boxes)
        boxes = tf.gather(boxes, indices)
        classes = tf.gather(classes, indices)

        # Assigns anchors.
        input_anchor = anchor.build_anchor_generator(
            min_level=self._min_level,
            max_level=self._max_level,
            num_scales=self._num_scales,
            aspect_ratios=self._aspect_ratios,
            anchor_size=self._anchor_size)
        anchor_boxes = input_anchor(image_size=(image_height, image_width))
        anchor_labeler = anchor.AnchorLabeler(self._match_threshold,
                                              self._unmatched_threshold)
        (cls_targets, box_targets, cls_weights,
         box_weights) = anchor_labeler.label_anchors(
             anchor_boxes, boxes, tf.expand_dims(classes, axis=1))

        # If bfloat16 is used, casts input image to tf.bfloat16.
        if self._use_bfloat16:
            image = tf.cast(image, dtype=tf.bfloat16)

        # Packs labels for model_fn outputs.
        labels = {
            'cls_targets': cls_targets,
            'box_targets': box_targets,
            'anchor_boxes': anchor_boxes,
            'cls_weights': cls_weights,
            'box_weights': box_weights,
            'image_info': image_info,
        }
        return image, labels
Ejemplo n.º 11
0
    def testDetectionsOutputShape(self, use_batched_nms):
        min_level = 4
        max_level = 6
        num_scales = 2
        max_num_detections = 100
        aspect_ratios = [
            1.0,
            2.0,
        ]
        anchor_scale = 2.0
        output_size = [64, 64]
        num_classes = 4
        pre_nms_top_k = 5000
        pre_nms_score_threshold = 0.01
        batch_size = 1
        kwargs = {
            'apply_nms': True,
            'pre_nms_top_k': pre_nms_top_k,
            'pre_nms_score_threshold': pre_nms_score_threshold,
            'nms_iou_threshold': 0.5,
            'max_num_detections': max_num_detections,
            'use_batched_nms': use_batched_nms,
        }

        input_anchor = anchor.build_anchor_generator(min_level, max_level,
                                                     num_scales, aspect_ratios,
                                                     anchor_scale)
        anchor_boxes = input_anchor(output_size)
        cls_outputs_all = (np.random.rand(84, num_classes) -
                           0.5) * 3  # random 84x3 outputs.
        box_outputs_all = np.random.rand(84, 4)  # random 84 boxes.
        class_outputs = {
            '4':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32),
                [1, 8, 8, num_classes]),
            '5':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32),
                [1, 4, 4, num_classes]),
            '6':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32),
                [1, 2, 2, num_classes]),
        }
        box_outputs = {
            '4':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[0:64], dtype=tf.float32),
                [1, 8, 8, 4]),
            '5':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[64:80], dtype=tf.float32),
                [1, 4, 4, 4]),
            '6':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[80:84], dtype=tf.float32),
                [1, 2, 2, 4]),
        }
        image_info = tf.constant(
            [[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], dtype=tf.float32)
        generator = detection_generator.MultilevelDetectionGenerator(**kwargs)
        results = generator(box_outputs, class_outputs, anchor_boxes,
                            image_info[:, 1, :])
        boxes = results['detection_boxes']
        classes = results['detection_classes']
        scores = results['detection_scores']
        valid_detections = results['num_detections']

        self.assertEqual(boxes.numpy().shape,
                         (batch_size, max_num_detections, 4))
        self.assertEqual(scores.numpy().shape, (
            batch_size,
            max_num_detections,
        ))
        self.assertEqual(classes.numpy().shape, (
            batch_size,
            max_num_detections,
        ))
        self.assertEqual(valid_detections.numpy().shape, (batch_size, ))
Ejemplo n.º 12
0
    def testDetectionsOutputShape(self, nms_version, has_att_heads,
                                  use_cpu_nms, soft_nms_sigma):
        min_level = 4
        max_level = 6
        num_scales = 2
        max_num_detections = 10
        aspect_ratios = [1.0, 2.0]
        anchor_scale = 2.0
        output_size = [64, 64]
        num_classes = 4
        pre_nms_top_k = 5000
        pre_nms_score_threshold = 0.01
        batch_size = 1
        kwargs = {
            'apply_nms': True,
            'pre_nms_top_k': pre_nms_top_k,
            'pre_nms_score_threshold': pre_nms_score_threshold,
            'nms_iou_threshold': 0.5,
            'max_num_detections': max_num_detections,
            'nms_version': nms_version,
            'use_cpu_nms': use_cpu_nms,
            'soft_nms_sigma': soft_nms_sigma,
        }

        input_anchor = anchor.build_anchor_generator(min_level, max_level,
                                                     num_scales, aspect_ratios,
                                                     anchor_scale)
        anchor_boxes = input_anchor(output_size)
        cls_outputs_all = (np.random.rand(84, num_classes) -
                           0.5) * 3  # random 84x3 outputs.
        box_outputs_all = np.random.rand(84, 4)  # random 84 boxes.
        class_outputs = {
            '4':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[0:64], dtype=tf.float32),
                [1, 8, 8, num_classes]),
            '5':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[64:80], dtype=tf.float32),
                [1, 4, 4, num_classes]),
            '6':
            tf.reshape(
                tf.convert_to_tensor(cls_outputs_all[80:84], dtype=tf.float32),
                [1, 2, 2, num_classes]),
        }
        box_outputs = {
            '4':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[0:64], dtype=tf.float32),
                [1, 8, 8, 4]),
            '5':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[64:80], dtype=tf.float32),
                [1, 4, 4, 4]),
            '6':
            tf.reshape(
                tf.convert_to_tensor(box_outputs_all[80:84], dtype=tf.float32),
                [1, 2, 2, 4]),
        }
        if has_att_heads:
            att_outputs_all = np.random.rand(84, 1)  # random attributes.
            att_outputs = {
                'depth': {
                    '4':
                    tf.reshape(
                        tf.convert_to_tensor(att_outputs_all[0:64],
                                             dtype=tf.float32), [1, 8, 8, 1]),
                    '5':
                    tf.reshape(
                        tf.convert_to_tensor(att_outputs_all[64:80],
                                             dtype=tf.float32), [1, 4, 4, 1]),
                    '6':
                    tf.reshape(
                        tf.convert_to_tensor(att_outputs_all[80:84],
                                             dtype=tf.float32), [1, 2, 2, 1]),
                }
            }
        else:
            att_outputs = None
        image_info = tf.constant(
            [[[1000, 1000], [100, 100], [0.1, 0.1], [0, 0]]], dtype=tf.float32)
        generator = detection_generator.MultilevelDetectionGenerator(**kwargs)
        results = generator(box_outputs, class_outputs, anchor_boxes,
                            image_info[:, 1, :], att_outputs)
        boxes = results['detection_boxes']
        classes = results['detection_classes']
        scores = results['detection_scores']
        valid_detections = results['num_detections']

        self.assertEqual(boxes.numpy().shape,
                         (batch_size, max_num_detections, 4))
        self.assertEqual(scores.numpy().shape, (
            batch_size,
            max_num_detections,
        ))
        self.assertEqual(classes.numpy().shape, (
            batch_size,
            max_num_detections,
        ))
        self.assertEqual(valid_detections.numpy().shape, (batch_size, ))
        if has_att_heads:
            for att in results['detection_attributes'].values():
                self.assertEqual(att.numpy().shape,
                                 (batch_size, max_num_detections, 1))