def set_training_random_scale_factors(self, scale_min, scale_max): """Set the parameters for multiscale training.""" # Select a random scale factor. random_scale_factor = tf.random_uniform([], scale_min, scale_max) scaled_size = tf.to_int32(random_scale_factor * self._output_size) # Recompute the accurate scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] max_image_size = tf.to_float(tf.maximum(height, width)) image_scale = tf.to_float(scaled_size) / max_image_size # Select non-zero random offset (x, y) if scaled image is larger than # self._output_size. scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) offset_y = tf.to_float(scaled_height - self._output_size) offset_x = tf.to_float(scaled_width - self._output_size) offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1) offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1) offset_y = tf.to_int32(offset_y) offset_x = tf.to_int32(offset_x) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width self._crop_offset_x = offset_x self._crop_offset_y = offset_y
def set_scale_factors_to_output_size(self): """Set the parameters to resize input image to self._output_size.""" # Compute the scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] max_image_size = tf.to_float(tf.maximum(height, width)) image_scale = tf.to_float(self._output_size) / max_image_size scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width
def set_scale_factors_to_mlperf_reference_size(self): """Set the parameters to resize the image according to MLPerf reference.""" # Compute the scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] # Recompute the accurate scale_factor using rounded scaled image size. # https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/utils/blob.py#L70 # pylint: disable=line-too-long min_image_size = tf.to_float(tf.minimum(height, width)) max_image_size = tf.to_float(tf.maximum(height, width)) short_side_scale = tf.to_float( self._short_side_image_size) / min_image_size long_side_scale = (tf.to_float(self._long_side_max_image_size) / max_image_size) image_scale = tf.minimum(short_side_scale, long_side_scale) scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width return image_scale
def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets. Args: value: A dictionary contains an image and groundtruth annotations. Returns: A list of the following elements in order: image: Image tensor that is preproessed to have normalized value and fixed dimension [image_size, image_size, 3] label: label tensor of the same spatial dimension as the image. """ with tf.name_scope('parser'): data = example_decoder.decode(value) image = data['image'] label = data['labels_class'] label = tf.to_int32(label) input_processor = SegmentationInputProcessor( image, params['image_size'], label) # The image normalization is identical to Cloud TPU ResNet. input_processor.normalize_image() if self._is_training and params['input_rand_hflip']: input_processor.random_horizontal_flip() if self._is_training: input_processor.set_training_random_scale_factors( params['train_scale_min'], params['train_scale_max']) image = input_processor.resize_and_crop_image() # Set padding to background (class=0) during training. if self._is_training: label = input_processor.resize_and_crop_label(0) else: label = input_processor.resize_and_crop_label( params['ignore_label']) if params['use_bfloat16']: image = tf.cast(image, dtype=tf.bfloat16) return image, label