def set_training_random_scale_factors(self, scale_min, scale_max): """Set the parameters for multiscale training.""" # Select a random scale factor. random_scale_factor = tf.random_uniform([], scale_min, scale_max) scaled_size = tf.to_int32(random_scale_factor * self._output_size) # Recompute the accurate scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] max_image_size = tf.to_float(tf.maximum(height, width)) image_scale = tf.to_float(scaled_size) / max_image_size # Select non-zero random offset (x, y) if scaled image is larger than # self._output_size. scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) offset_y = tf.to_float(scaled_height - self._output_size) offset_x = tf.to_float(scaled_width - self._output_size) offset_y = tf.maximum(0.0, offset_y) * tf.random_uniform([], 0, 1) offset_x = tf.maximum(0.0, offset_x) * tf.random_uniform([], 0, 1) offset_y = tf.to_int32(offset_y) offset_x = tf.to_int32(offset_x) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width self._crop_offset_x = offset_x self._crop_offset_y = offset_y
def __init__(self, image, output_size, short_side_image_size, long_side_max_image_size): """Initializes a new `InputProcessor`. This InputProcessor is tailored for MLPerf. The reference implementation resizes images as the following: 1. Resize the short side to 800 pixels while keeping the aspect ratio. 2. Clip the long side at a maximum of 1333 pixels. Args: image: The input image before processing. output_size: A integer tuple of the output image size in the form of (short_side, long_side) after calling resize_and_crop_image function. short_side_image_size: The image size for the short side. This is analogy to cfg.TRAIN.scales in the MLPerf reference model. long_side_max_image_size: The maximum image size for the long side. This is analogy to cfg.TRAIN.max_size in the MLPerf reference model. """ self._image = image self._output_size = output_size self._short_side_image_size = short_side_image_size self._long_side_max_image_size = long_side_max_image_size # Parameters to control rescaling and shifting during preprocessing. # Image scale defines scale from original image to scaled image. self._image_scale = tf.constant(1.0) # The integer height and width of scaled image. self._scaled_height = tf.shape(image)[0] self._scaled_width = tf.shape(image)[1] self._ori_height = tf.shape(image)[0] self._ori_width = tf.shape(image)[1]
def crop_gt_masks(self, gt_mask_size): """Crops the ground truth binary masks and resize to fixed-size masks.""" num_boxes = tf.shape(self._boxes)[0] num_masks = tf.shape(self._masks)[0] assert_length = tf.Assert(tf.equal(num_boxes, num_masks), [num_masks]) def padded_bounding_box_fn(): return tf.reshape(self._masks, [-1, self._ori_height, self._ori_width, 1]) def zeroed_box_fn(): return tf.zeros([0, self._ori_height, self._ori_width, 1]) num_masks = tf.shape(self._masks)[0] # Check if there is any instance in this image or not. scaled_masks = tf.cond(num_masks > 0, padded_bounding_box_fn, zeroed_box_fn) with tf.control_dependencies([assert_length]): cropped_gt_masks = tf.image.crop_and_resize( image=scaled_masks, boxes=self._boxes, box_ind=tf.range(num_masks, dtype=tf.int32), crop_size=[gt_mask_size, gt_mask_size], method='bilinear')[:, :, :, 0] cropped_gt_masks = tf.pad(cropped_gt_masks, paddings=tf.constant([[ 0, 0, ], [ 2, 2, ], [2, 2]]), mode='CONSTANT', constant_values=0.) return cropped_gt_masks
def set_scale_factors_to_output_size(self): """Set the parameters to resize input image to self._output_size.""" # Compute the scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] max_image_size = tf.to_float(tf.maximum(height, width)) image_scale = tf.to_float(self._output_size) / max_image_size scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width
def upsampling_tpu_compatible(data, scale): """Nearest neighbor upsampling TPU-compatible implementation. This implementation is TPU compatible as opposed to tf.image.resize_nearest_neighbor(). Args: data: A 4D float32 tensor of shape [batch, height, width, channels]. scale: An integer multiple to scale resolution of input data. Returns: A 4D float32 tensor of shape [batch, height*scale, width*scale, channels]. """ with tf.name_scope('upsampling_tpu_compatible'): if data.get_shape().is_fully_defined(): bs, height, width, _ = [s.value for s in data.get_shape()] else: shape = tf.shape(data) bs, height, width = shape[0], shape[1], shape[2] channels = data.get_shape().as_list()[3] # Use reshape to quickly upsample the input. The nearest pixel is selected # implicitly via broadcasting. data = tf.reshape(data, [bs, height, 1, width, 1, channels]) * tf.ones( [1, 1, scale, 1, scale, 1], dtype=data.dtype) return tf.reshape(data, [bs, height * scale, width * scale, channels])
def __init__(self, image, output_size): """Initializes a new `InputProcessor`. Args: image: The input image before processing. output_size: The output image size after calling resize_and_crop_image function. """ self._image = image self._output_size = output_size # Parameters to control rescaling and shifting during preprocessing. # Image scale defines scale from original image to scaled image. self._image_scale = tf.constant(1.0) # The integer height and width of scaled image. self._scaled_height = tf.shape(image)[0] self._scaled_width = tf.shape(image)[1] # The x and y translation offset to crop scaled image to the output size. self._crop_offset_y = tf.constant(0) self._crop_offset_x = tf.constant(0)
def set_scale_factors_to_mlperf_reference_size(self): """Set the parameters to resize the image according to MLPerf reference.""" # Compute the scale_factor using rounded scaled image size. height = tf.shape(self._image)[0] width = tf.shape(self._image)[1] # Recompute the accurate scale_factor using rounded scaled image size. # https://github.com/ddkang/Detectron/blob/80f329530843e66d07ca39e19901d5f3e5daf009/lib/utils/blob.py#L70 # pylint: disable=line-too-long min_image_size = tf.to_float(tf.minimum(height, width)) max_image_size = tf.to_float(tf.maximum(height, width)) short_side_scale = tf.to_float( self._short_side_image_size) / min_image_size long_side_scale = (tf.to_float(self._long_side_max_image_size) / max_image_size) image_scale = tf.minimum(short_side_scale, long_side_scale) scaled_height = tf.to_int32(tf.to_float(height) * image_scale) scaled_width = tf.to_int32(tf.to_float(width) * image_scale) self._image_scale = image_scale self._scaled_height = scaled_height self._scaled_width = scaled_width return image_scale
def _dataset_parser(value): """Parse data to a fixed dimension input image and learning targets.""" with tf.name_scope('parser'): data = example_decoder.decode(value) source_id = data['source_id'] image = data['image'] boxes = data['groundtruth_boxes'] classes = data['groundtruth_classes'] classes = tf.reshape(tf.cast(classes, dtype=tf.float32), [-1, 1]) # the image normalization is identical to Cloud TPU ResNet-50 image = tf.image.convert_image_dtype(image, dtype=tf.float32) image = _normalize_image(image) if params['input_rand_hflip']: image, boxes = preprocessor.random_horizontal_flip( image, boxes=boxes) image_original_shape = tf.shape(image) image, _ = preprocessor.resize_to_range( image, min_dimension=params['image_size'], max_dimension=params['image_size']) image_scale = tf.to_float( image_original_shape[0]) / tf.to_float(tf.shape(image)[0]) image, boxes = preprocessor.scale_boxes_to_pixel_coordinates( image, boxes, keypoints=None) image = tf.image.pad_to_bounding_box(image, 0, 0, params['image_size'], params['image_size']) (cls_targets, box_targets, num_positives) = anchor_labeler.label_anchors(boxes, classes) source_id = tf.string_to_number(source_id, out_type=tf.float32) row = (image, cls_targets, box_targets, num_positives, source_id, image_scale) return row
def upsampling(input_, kernel_size, stride, num_outputs, scope, activation_fn=tf.nn.relu, tpu_compatible=False): """A smooth replacement of a same-padded transposed convolution. This function first computes a nearest-neighbor upsampling of the input by a factor of `stride`, then applies a mirror-padded, same-padded convolution. It expects `kernel_size` to be odd. Args: input_: 4-D Tensor input. kernel_size: int (odd-valued) representing the kernel size. stride: int representing the strides. num_outputs: int. Number of output feature maps. scope: str. Scope under which to operate. activation_fn: activation function. tpu_compatible: bool. Whether to use a nearest neighbor upsampling compatible with TPU or the default tf.image implementation. Returns: 4-D Tensor output. Raises: ValueError: if `kernel_size` is even. """ if kernel_size % 2 == 0: raise ValueError('kernel_size is expected to be odd.') with tf.variable_scope(scope): if tpu_compatible: upsampled_input = upsampling_tpu_compatible(input_, stride) else: if input_.get_shape().is_fully_defined(): _, height, width, _ = [s.value for s in input_.get_shape()] else: shape = tf.shape(input_) height, width = shape[1], shape[2] upsampled_input = tf.image.resize_nearest_neighbor( input_, [stride * height, stride * width]) return conv2d(upsampled_input, kernel_size, 1, num_outputs, 'conv', activation_fn=activation_fn)
def pad_to_fixed_size(data, pad_value, output_shape): """Pad data to a fixed length at the first dimension. Args: data: Tensor to be padded to output_shape. pad_value: A constant value assigned to the paddings. output_shape: The output shape of a 2D tensor. Returns: The Padded tensor with output_shape [max_num_instances, dimension]. """ max_num_instances = output_shape[0] dimension = output_shape[1] data = tf.reshape(data, [-1, dimension]) num_instances = tf.shape(data)[0] assert_length = tf.Assert(tf.less_equal(num_instances, max_num_instances), [num_instances]) with tf.control_dependencies([assert_length]): pad_length = max_num_instances - num_instances paddings = pad_value * tf.ones([pad_length, dimension]) padded_data = tf.concat([data, paddings], axis=0) padded_data = tf.reshape(padded_data, output_shape) return padded_data
def parser(record): """function used to parse tfrecord.""" record_spec = { "input": tf.FixedLenFeature([seq_len], tf.int64), "target": tf.FixedLenFeature([seq_len], tf.int64), "seg_id": tf.FixedLenFeature([seq_len], tf.int64), "label": tf.FixedLenFeature([1], tf.int64), "is_masked": tf.FixedLenFeature([seq_len], tf.int64), } # retrieve serialized example example = tf.parse_single_example( serialized=record, features=record_spec) inputs = example.pop("input") target = example.pop("target") is_masked = tf.cast(example.pop("is_masked"), tf.bool) non_reuse_len = seq_len - reuse_len assert perm_size <= reuse_len and perm_size <= non_reuse_len perm_mask_0, target_0, target_mask_0, input_k_0, input_q_0 = _local_perm( inputs[:reuse_len], target[:reuse_len], is_masked[:reuse_len], perm_size, reuse_len) perm_mask_1, target_1, target_mask_1, input_k_1, input_q_1 = _local_perm( inputs[reuse_len:], target[reuse_len:], is_masked[reuse_len:], perm_size, non_reuse_len) perm_mask_0 = tf.concat([perm_mask_0, tf.ones([reuse_len, non_reuse_len])], axis=1) perm_mask_1 = tf.concat([tf.zeros([non_reuse_len, reuse_len]), perm_mask_1], axis=1) perm_mask = tf.concat([perm_mask_0, perm_mask_1], axis=0) target = tf.concat([target_0, target_1], axis=0) target_mask = tf.concat([target_mask_0, target_mask_1], axis=0) input_k = tf.concat([input_k_0, input_k_1], axis=0) input_q = tf.concat([input_q_0, input_q_1], axis=0) if num_predict is not None: indices = tf.range(seq_len, dtype=tf.int64) bool_target_mask = tf.cast(target_mask, tf.bool) indices = tf.boolean_mask(indices, bool_target_mask) ##### extra padding due to CLS/SEP introduced after prepro actual_num_predict = tf.shape(indices)[0] pad_len = num_predict - actual_num_predict ##### target_mapping target_mapping = tf.one_hot(indices, seq_len, dtype=tf.float32) paddings = tf.zeros([pad_len, seq_len], dtype=target_mapping.dtype) target_mapping = tf.concat([target_mapping, paddings], axis=0) example["target_mapping"] = tf.reshape(target_mapping, [num_predict, seq_len]) ##### target target = tf.boolean_mask(target, bool_target_mask) paddings = tf.zeros([pad_len], dtype=target.dtype) target = tf.concat([target, paddings], axis=0) example["target"] = tf.reshape(target, [num_predict]) ##### target mask target_mask = tf.concat( [tf.ones([actual_num_predict], dtype=tf.float32), tf.zeros([pad_len], dtype=tf.float32)], axis=0) example["target_mask"] = tf.reshape(target_mask, [num_predict]) else: example["target"] = tf.reshape(target, [seq_len]) example["target_mask"] = tf.reshape(target_mask, [seq_len]) # reshape back to fixed shape example["perm_mask"] = tf.reshape(perm_mask, [seq_len, seq_len]) example["input_k"] = tf.reshape(input_k, [seq_len]) example["input_q"] = tf.reshape(input_q, [seq_len]) _convert_example(example, use_bfloat16) for k, v in example.items(): tf.logging.info("%s: %s", k, v) return example