def get_image_info(self): """Returns image information for scaled and original height and width.""" return tf.stack([ tf.to_float(self._scaled_height), tf.to_float(self._scaled_width), 1.0 / self._image_scale, tf.to_float(self._ori_height), tf.to_float(self._ori_width) ])
def set_scale_factors_to_output_size(self): """Set the parameters to resize input image to self._output_size.""" # Compute the scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] max_image_size = tf.to_float(tf.maximum(height, width)) image_scale = tf.to_float(self._output_size) / max_image_size scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width
def set_training_random_scale_factors(self, scale_min, scale_max): """Set the parameters for multiscale training.""" # Select a random scale factor. random_scale_factor = tf.random_uniform([], scale_min, scale_max) scaled_size = tf.to_int32(random_scale_factor * self._output_size) # Recompute the accurate scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] max_image_size = tf.to_float(tf.maximum(height, width)) image_scale = tf.to_float(scaled_size) / max_image_size # Select non-zero random offset (x, y) if scaled image is larger than # self._output_size. scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) offset_y = tf.to_float(scaled_height - self._output_size) offset_x = tf.to_float(scaled_width - self._output_size) offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1) offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1) offset_y = tf.to_int32(offset_y) offset_x = tf.to_int32(offset_x) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width self._crop_offset_x = offset_x self._crop_offset_y = offset_y
def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets.""" with tf.name_scope('parser'): data = example_decoder.decode(value) source_id = data['source_id'] image = data['image'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) # the image normalization is identical to Cloud TPU ResNet-50 image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = _normalize_image(image) if params['input_rand_hflip']: image, boxes = preprocessor.random_horizontal_flip( image, boxes=boxes) image_original_shape = tf.shape(image) image, _ = preprocessor.resize_to_range( image, min_dimension=params['image_size'], max_dimension=params['image_size']) image_scale = tf.to_float( image_original_shape[0]) / tf.to_float(tf.shape(image)[0]) image, boxes = preprocessor.scale_boxes_to_pixel_coordinates( image, boxes, keypoints=None) image = tf.image.pad_to_bounding_box(image, 0, 0, params['image_size'], params['image_size']) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(boxes, classes) source_id = tf.string_to_number(source_id, out_type=tf.float32) row = (image, cls_targets, box_targets, num_positives, source_id, image_scale) return row
def resize_and_crop_boxes(self): """Resize boxes and crop it to the self._output dimension.""" boxlist = preprocessor.box_list.BoxList(self._boxes) boxes = preprocessor.box_list_scale( boxlist, self._scaled_height, self._scaled_width).get() # Adjust box coordinates based on the offset. box_offset = tf.stack([self._crop_offset_y, self._crop_offset_x, self._crop_offset_y, self._crop_offset_x,]) boxes -= tf.to_float(tf.reshape(box_offset, [1, 4])) # Clip the boxes. boxes = self.clip_boxes(boxes) # Filter out ground truth boxes that are all zeros. indices = tf.where(tf.not_equal(tf.reduce_sum(boxes, axis=1), 0)) boxes = tf.gather_nd(boxes, indices) classes = tf.gather_nd(self._classes, indices) return boxes, classes
def set_scale_factors_to_mlperf_reference_size(self): """Set the parameters to resize the image according to MLPerf reference.""" # Compute the scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] # Recompute the accurate scale_factor using rounded scaled image size. # https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/utils/blob.py#L70 # pylint: disable=line-too-long min_image_size = tf.to_float(tf.minimum(height, width)) max_image_size = tf.to_float(tf.maximum(height, width)) short_side_scale = tf.to_float( self._short_side_image_size) / min_image_size long_side_scale = (tf.to_float(self._long_side_max_image_size) / max_image_size) image_scale = tf.minimum(short_side_scale, long_side_scale) scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width return image_scale