def _parse_eval_data(self, data): """Generates images and labels that are usable for model training. Args: data: a dict of Tensors produced by the decoder. Returns: images: the image tensor. labels: a dict of Tensors that contains labels. """ shape = tf.shape(data['image']) image = data['image'] / 255 boxes = data['groundtruth_boxes'] width = shape[0] height = shape[1] image, boxes = yolo_preprocess_ops.fit_preserve_aspect_ratio( image, boxes, width=width, height=height, target_dim=self._image_w) boxes = yolo_box_ops.yxyx_to_xcycwh(boxes) # Find the best anchor for the ground truth labels to maximize the iou best_anchors = yolo_preprocess_ops.get_best_anchor( boxes, self._anchors, width=self._image_w, height=self._image_h) boxes = yolo_preprocess_ops.pad_max_instances(boxes, self._max_num_instances, 0) classes = yolo_preprocess_ops.pad_max_instances( data['groundtruth_classes'], self._max_num_instances, 0) best_anchors = yolo_preprocess_ops.pad_max_instances( best_anchors, self._max_num_instances, 0) area = yolo_preprocess_ops.pad_max_instances(data['groundtruth_area'], self._max_num_instances, 0) is_crowd = yolo_preprocess_ops.pad_max_instances( tf.cast(data['groundtruth_is_crowd'], tf.int32), self._max_num_instances, 0) labels = { 'source_id': data['source_id'], 'bbox': tf.cast(boxes, self._dtype), 'classes': tf.cast(classes, self._dtype), 'area': tf.cast(area, self._dtype), 'is_crowd': is_crowd, 'best_anchors': tf.cast(best_anchors, self._dtype), 'width': width, 'height': height, 'num_detections': tf.shape(data['groundtruth_classes'])[0], } grid = self._build_grid(labels, self._image_w, batch=False, use_tie_breaker=self._use_tie_breaker) labels.update({'grid_form': grid}) return image, labels
def test_pad_max_instances(self, input_shape, instances, pad_axis): expected_output_shape = input_shape expected_output_shape[pad_axis] = instances output = preprocess_ops.pad_max_instances(np.ones(input_shape), instances, pad_axis=pad_axis) self.assertAllEqual(expected_output_shape, tf.shape(output).numpy())