def _projective_transform(data, proj_matrix, static_axis, interpolation): """Apply projective transformation.""" if static_axis == 2: data = contrib_image.transform(data, proj_matrix, interpolation) elif static_axis == 1: data = tf.transpose(data, [0, 2, 1]) data = contrib_image.transform(data, proj_matrix, interpolation) data = tf.transpose(data, [0, 2, 1]) else: data = tf.transpose(data, [2, 1, 0]) data = contrib_image.transform(data, proj_matrix, interpolation) data = tf.transpose(data, [2, 1, 0]) return data
def __call__(self, image, **kwargs): seed = kwargs.get("seed", None) size = self.get_size(image) ret = self.get_params(self.degrees, self.translate, self.scale, self.shear, size, seed) assert len(size) == 3, "`image` must be [h, w, c]" if isinstance(self.constant, int): self.constant = [self.constant] * size[-1] elif len(self.constant) != size[-1]: raise ValueError( "`constant` value must have the same number with image channels, got {}" " and image has {} channels.".format(self.constant, size[-1])) center = (size[0] * 0.5 + 0.5, size[1] * 0.5 + 0.5) matrix = self._get_inverse_affine_matrix(center, *ret) if isinstance(image, tf.Tensor): matrix = matrix + [0., 0.] with tf.name_scope( kwargs.get("name", self.__class__.__name__.lower())): return transform(image, matrix, interpolation=self.order_tf, output_shape=size[:-1]) else: matrix = np.array(matrix, np.float32).reshape(2, 3) return cv2.warpAffine(image, matrix[:2], dsize=size[1::-1], flags=self.order, borderMode=self.border, borderValue=self.constant)
def homography_scale_warp_per_image(image, width, height, ref_width, ref_height, corner_shifts): """Transforms an input image using a specified homography. Args: image: input image of shape [height, width, channels] and of data type uint8 or float32 width: the width of the input image height: the height of the input image ref_width: the homograph is parameterized using the displacements of four image corners. ref_width is the width of the original image that the corner displacement is computed from ref_height: the height of the original image that the corner displacement is computed from corner_shifts: the displacements of the four image corner points of data type float32 and of shape [8] Returns: the warped result of the same shape and data type as image """ hmg_base = shifts_to_homography(ref_width, ref_height, corner_shifts, is_forward=False, is_matrix=False) sx = tf.to_float(ref_width) / tf.to_float(width) sy = tf.to_float(ref_height) / tf.to_float(height) vec_scale = tf.stack([1, sy / sx, 1 / sx, sx / sy, 1, 1 / sy, sx, sy]) transform = tf.multiply(hmg_base, vec_scale) warped = contrib_image.transform(image, transform, 'bilinear') return warped, transform
def apply(self, img): input_shape = tf.shape(img) tmp_height = tf.maximum(self.height, input_shape[0]) tmp_width = tf.maximum(self.width, input_shape[1]) return tf_image.transform( tf.image.pad_to_bounding_box(img, 0, 0, tmp_height, tmp_width), tf.reshape(self.src_T_out, (-1,))[:8] )[:self.height, :self.width]
def shear_x(image, level, replace): '''Equivalent of PIL Shearing in X dimension.''' # Shear parallel to x axis is a projective transform # with a matrix form of: # [1 level # 0 1]. image = contrib_image.transform( wrap(image), [1., level, 0., 0., 1., 0., 0., 0.]) return unwrap(image, replace)
def shear_y(image, level, replace): """Equivalent of PIL Shearing in Y dimension.""" # Shear parallel to y axis is a projective transform # with a matrix form of: # [1 0 # level 1]. image = contrib_image.transform( wrap(image), [1., 0., 0., level, 1., 0., 0., 0.]) return unwrap(image, replace)
def _shear_x(image, level, replace): """Equivalent of PIL Shearing in X dimension.""" # Shear parallel to x axis is a projective transform # with a matrix form of: # [1 level # 0 1]. image = image_ops.transform(_wrap(image), [1., level, 0., 0., 1., 0., 0., 0.]) return _unwrap(image, replace)
def __call__(self, img, **kwargs): shp = tf.shape(img) batch_size, height, width = shp[0], shp[1], shp[2] coin = tf.less(tf.compat.v1.random_uniform([batch_size], 0, 1.0), self.p) angle_rad = self.angle * 3.141592653589793 / 180.0 angles = tf.compat.v1.random_uniform([batch_size], -angle_rad, angle_rad) angles *= tf.cast(coin, tf.float32) f = angles_to_projective_transforms(angles, tf.cast(height, tf.float32), tf.cast(width, tf.float32)) augm_img = transform(img, f, interpolation='BILINEAR') return augm_img
def affine_transform(X, rate): trans_matrix = tf.eye(2) trans_matrix = tf.cond(prob(rate),lambda: rotate(trans_matrix), lambda: trans_matrix) trans_matrix = tf.cond(prob(rate),lambda: shear(trans_matrix), lambda: trans_matrix) trans_matrix = tf.cond(prob(rate),lambda: scale(trans_matrix), lambda: trans_matrix) X = tf.cond(prob(rate),lambda: tf.map_fn(random_erase, X), lambda: X) t = tf.cond(prob(rate), translation, lambda: tf.zeros(2)) a0,a1,b0,b1 = trans_matrix[0][0],trans_matrix[0][1],trans_matrix[1][0],trans_matrix[1][1] a2,b2 = t[0],t[1] return transform(X, [a0,a1,a2,b0,b1,b2,0,0])
def subpixel_homography(image, height, width, dy1, dx1, dy2, dx2, dy3, dx3, dy4, dx4): """Applies a homography to an image. Args: image: input image of shape [input_height, input_width, channels] and of data type uint8 or float32 height: the output image height width: the output image width dy1: the vertical shift of the top left corner dx1: the horizontal shift of the top left corner dy2: the vertical shift of the bottom left corner dx2: the horizontal shift of the bottom left corner dy3: the vertical shift of the top right corner dx3: the horizontal shift of the top right corner dy4: the vertical shift of the bottom right corner dx4: the horizontal shift of the bottom right corner Returns: the warping result of shape [height, width, channels] with the same data type as image """ rx1 = tf.cast(tf.stack([0, 0, 1, 0, 0, 0, 0, 0]), tf.float32) ry1 = tf.cast(tf.stack([0, 0, 0, 0, 0, 1, 0, 0]), tf.float32) rx2 = tf.cast( tf.stack([0, height - 1, 1, 0, 0, 0, 0, -(height - 1) * dx2]), tf.float32) ry2 = tf.cast( tf.stack([0, 0, 0, 0, height - 1, 1, 0, -(height - 1) * dy2]), tf.float32) rx3 = tf.cast(tf.stack([width - 1, 0, 1, 0, 0, 0, -(width - 1) * dx3, 0]), tf.float32) ry3 = tf.cast(tf.stack([0, 0, 0, width - 1, 0, 1, -(width - 1) * dy3, 0]), tf.float32) rx4 = tf.cast( tf.stack([ width - 1, height - 1, 1, 0, 0, 0, -(width - 1) * dx4, -(height - 1) * dx4 ]), tf.float32) ry4 = tf.cast( tf.stack([ 0, 0, 0, width - 1, height - 1, 1, -(width - 1) * dy4, -(height - 1) * dy4 ]), tf.float32) mat = tf.stack([rx1, ry1, rx2, ry2, rx3, ry3, rx4, ry4]) b = tf.reshape( tf.cast(tf.stack([dx1, dy1, dx2, dy2, dx3, dy3, dx4, dy4]), tf.float32), [8, 1]) inv_mat = tf.matrix_inverse(mat) transformation = tf.reshape(tf.matmul(inv_mat, b), [8]) warped = contrib_image.transform(image, transformation, 'bilinear') cropped = tf.image.crop_to_bounding_box(warped, 0, 0, height, width) return cropped
def subpixel_crop(image, y, x, height, width): """Crops out a region [x, y, x + width, y + height] from an image. Args: image: input image of shape [input_height, input_width, channels] and of data type uint8 or float32 y: the y coordinate of the top left corner of the cropping window x: the x coordinate of the top left corner of the cropping window height: the height of the cropping window width: the width of the cropping window Returns: the cropping result of shape [height, width, channels] with the same type as image """ transformation = tf.cast(tf.stack([1, 0, x, 0, 1, y, 0, 0]), tf.float32) translated = contrib_image.transform(image, transformation, 'bilinear') cropped = tf.image.crop_to_bounding_box(translated, 0, 0, height, width) return cropped
def _random_affine_distort(image): source_x = np.array([38, 89, 64]) source_y = np.array([55, 55, 105]) rnd = random.randint(0, 728) target_x = np.array([source_x[0] + rnd/243-1, source_x[1] + rnd%81/27-1, source_x[2] + rnd%9/3-1]) target_y = np.array([source_y[0] + rnd%243/81-1, source_y[1] + rnd%27/9-1, source_y[2] + rnd%3-1]) A = np.vstack((source_x, source_y, np.ones(3))) A = np.transpose(A) tform_x = np.linalg.solve(A, target_x) tform_y = np.linalg.solve(A, target_y) tform = tform_x.tolist() + tform_y.tolist() + [0, 0] image = transform(image, tform, interpolation='BILINEAR') return image
def homography_warp_per_image(image, width, height, corner_shifts): """Transforms an input image using a specified homography. Args: image: input image of shape [input_height, input_width, channels] and of data type uint8 or float32 width: the homograph is parameterized using the displacements of four image corners. width is the width of the image that the corner displacement is computed from. height: the image height corner_shifts: the displacements of the four image corner points of data type float32 and of shape [8] Returns: the warped result of the same shape as image and of data type float32 """ transform = shifts_to_homography(width, height, corner_shifts, is_forward=False, is_matrix=False) warped = contrib_image.transform(image, transform, 'bilinear') return warped, transform
def translate(image, x, y): """Translates the image. Args: image: A 2D float32 tensor. x: The x shift of the output, in pixels. y: The y shift of the output, in pixels. Returns: The translated image tensor. """ # TODO(ringw): Fix mixing scalar constants and scalar tensors here. one = tf.constant(1, tf.float32) zero = tf.constant(0, tf.float32) # The inverted transformation matrix expected by tf.contrib.image.transform. # The last entry is the 3x3 matrix is left out and is always 1. translation_matrix = tf.convert_to_tensor( [one, zero, tf.to_float(-x), zero, one, tf.to_float(-y), zero, zero], tf.float32) # pyformat: disable return contrib_image.transform(image, translation_matrix)
def augment_seqs_ava(raw_frames, num_frame, max_shift, batch_size=2, queue_size=60, num_threads=3, train_height=128, train_width=128, pixel_noise=0.0, mix=True, screen=False, mode='train', to_gray=True): """Prepares training sequence batches from AVA dataset. Args: raw_frames: input video frames from AVA dataset num_frame: the number of frames in a sequence max_shift: the range each image corner point can move batch_size: the size of training or testing batches queue_size: the queue size of the shuffle buffer num_threads: the number of threads of the shuffle buffer train_height: the height of the training/testing images train_width: the width of the training/testing images pixel_noise: the magnitude of additive noises mix: whether mix the magnitude of corner point shifts screen: whether remove highly distorted homographies mode: 'train' or 'eval', specifying whether preparing images for training or testing to_gray: whether prepare color or gray scale training images Returns: a batch of training images and the corresponding ground-truth homographies """ if to_gray: output_frames = tf.image.rgb_to_grayscale(raw_frames) num_channel = 1 else: output_frames = raw_frames num_channel = 3 frame_height = tf.to_float(tf.shape(output_frames)[1]) frame_width = tf.to_float(tf.shape(output_frames)[2]) if mix: p = tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32) scale = (tf.to_float(tf.greater(p, 0.1)) + tf.to_float( tf.greater(p, 0.2)) + tf.to_float(tf.greater(p, 0.3))) / 3 else: scale = 1.0 new_max_shift = max_shift * scale rand_shift_base = tf.random_uniform([num_frame, 8], minval=-new_max_shift, maxval=new_max_shift, dtype=tf.float32) crop_width = frame_width - 2 * new_max_shift - 1 crop_height = frame_height - 2 * new_max_shift - 1 ref_window = tf.to_float( tf.stack([ 0, 0, 0, crop_height - 1, crop_width - 1, 0, crop_width - 1, crop_height - 1 ])) if screen: new_shift_list = [] flag_list = [] hmg_list = [] src_points = tf.reshape(ref_window, [4, 2]) for i in range(num_frame): dst_points = tf.reshape( rand_shift_base[i] + ref_window + new_max_shift, [4, 2]) hmg = calc_homography_from_points(src_points, dst_points) hmg_list.append(hmg) for i in range(num_frame - 1): hmg = tf.matmul(tf.matrix_inverse(hmg_list[i + 1]), hmg_list[i]) shift = homography_to_shifts(hmg, crop_width, crop_height) angles = calc_homography_distortion(crop_width, crop_height, shift) max_angle = tf.reduce_min(angles) flag = tf.to_float(max_angle >= -0.707) flag_list.append(flag) if i > 0: new_shift = rand_shift_base[i] * flag * flag_list[i - 1] else: new_shift = rand_shift_base[i] * flag new_shift_list.append(new_shift) new_shift_list.append(rand_shift_base[num_frame - 1] * flag_list[num_frame - 2]) rand_shift = tf.stack(new_shift_list) else: rand_shift = rand_shift_base mat_scale = tf.diag( tf.stack([crop_width / train_width, crop_height / train_height, 1.0])) inv_mat_scale = tf.matrix_inverse(mat_scale) hmg_list = [] frame_list = [] for i in range(num_frame): src_points = tf.reshape(ref_window, [4, 2]) dst_points = tf.reshape(rand_shift[i] + ref_window + new_max_shift, [4, 2]) hmg = calc_homography_from_points(src_points, dst_points) hmg_list.append(hmg) transform = tf.reshape(hmg, [9]) / hmg[2, 2] warped = contrib_image.transform(output_frames[i], transform[:8], 'bilinear') crop_window = tf.expand_dims( tf.stack([ 0, 0, (crop_height - 1) / (frame_height - 1), (crop_width - 1) / (frame_width - 1) ]), 0) resized_base = tf.image.crop_and_resize(tf.expand_dims(warped, 0), crop_window, [0], [train_height, train_width]) resized = tf.squeeze(resized_base, [0]) noise_im = tf.truncated_normal(shape=tf.shape(resized), mean=0.0, stddev=pixel_noise, dtype=tf.float32) noise_frame = normalize_image(tf.to_float(resized) + noise_im) frame_list.append(noise_frame) noise_frames = tf.reshape(tf.stack( frame_list, 2), (train_height, train_width, num_frame * num_channel)) label_list = [] for i in range(num_frame - 1): hmg_combine = tf.matmul(tf.matrix_inverse(hmg_list[i + 1]), hmg_list[i]) hmg_final = tf.matmul(inv_mat_scale, tf.matmul(hmg_combine, mat_scale)) label = homography_to_shifts(hmg_final, train_width, train_height) label_list.append(label) labels = tf.reshape(tf.stack(label_list, 0), [(num_frame - 1) * 8]) if mode == 'train': min_after_dequeue = int(queue_size / 3) else: min_after_dequeue = batch_size * 3 batch_frames, batch_labels = tf.train.shuffle_batch( [noise_frames, labels], batch_size=batch_size, num_threads=num_threads, capacity=queue_size, min_after_dequeue=min_after_dequeue, enqueue_many=False) return tf.cast(batch_frames, tf.float32), tf.cast(batch_labels, tf.float32)
def translate(images, tx, ty, interpolation='NEAREST'): transforms = [1, 0, -tx, 0, 1, -ty, 0, 0] return transform(images, transforms, interpolation)
mul = t_params.pop('mul') agn = t_params.pop('agn') color_m = t_params.pop('color_m') resize_smooth = t_params.pop('resize_smooth') t_matrix = assemble_transformation_matrix(**t_params) for k in range(n): inp = np.squeeze(X[k]) if agn is not None: gauss = rnd.normal(0, rnd.uniform(agn[0], agn[1]), inp.shape) else: gauss = None x_t = transform(inp, t_matrix, h_flip, gauss, resize, resize_smooth, mul, color_m) X_t.append(x_t) return np.array(X_t) def generate_random_sequences(X, Y, sequence_size = 32, shift = 16, rseed = 0, final_size = None, t_params_f = None, final_heatmap_size = None): rnd = np.random.RandomState(rseed) if not t_params_f: raise Exception('No attributes given!') if final_size is None: final_size = min(X.shape[2], X.shape[3])