Ejemplo n.º 1
0
 def _projective_transform(data, proj_matrix, static_axis, interpolation):
     """Apply projective transformation."""
     if static_axis == 2:
         data = contrib_image.transform(data, proj_matrix, interpolation)
     elif static_axis == 1:
         data = tf.transpose(data, [0, 2, 1])
         data = contrib_image.transform(data, proj_matrix, interpolation)
         data = tf.transpose(data, [0, 2, 1])
     else:
         data = tf.transpose(data, [2, 1, 0])
         data = contrib_image.transform(data, proj_matrix, interpolation)
         data = tf.transpose(data, [2, 1, 0])
     return data
Ejemplo n.º 2
0
 def __call__(self, image, **kwargs):
     seed = kwargs.get("seed", None)
     size = self.get_size(image)
     ret = self.get_params(self.degrees, self.translate, self.scale,
                           self.shear, size, seed)
     assert len(size) == 3, "`image` must be [h, w, c]"
     if isinstance(self.constant, int):
         self.constant = [self.constant] * size[-1]
     elif len(self.constant) != size[-1]:
         raise ValueError(
             "`constant` value must have the same number with image channels, got {}"
             " and image has {} channels.".format(self.constant, size[-1]))
     center = (size[0] * 0.5 + 0.5, size[1] * 0.5 + 0.5)
     matrix = self._get_inverse_affine_matrix(center, *ret)
     if isinstance(image, tf.Tensor):
         matrix = matrix + [0., 0.]
         with tf.name_scope(
                 kwargs.get("name", self.__class__.__name__.lower())):
             return transform(image,
                              matrix,
                              interpolation=self.order_tf,
                              output_shape=size[:-1])
     else:
         matrix = np.array(matrix, np.float32).reshape(2, 3)
         return cv2.warpAffine(image,
                               matrix[:2],
                               dsize=size[1::-1],
                               flags=self.order,
                               borderMode=self.border,
                               borderValue=self.constant)
Ejemplo n.º 3
0
def homography_scale_warp_per_image(image, width, height, ref_width,
                                    ref_height, corner_shifts):
    """Transforms an input image using a specified homography.

  Args:
    image: input image of shape [height, width, channels] and of data type uint8
      or float32
    width: the width of the input image
    height: the height of the input image
    ref_width: the homograph is parameterized using the displacements of four
      image corners. ref_width is the width of the original image that the
      corner displacement is computed from
    ref_height: the height of the original image that the corner displacement
      is computed from
    corner_shifts: the displacements of the four image corner points of data
      type float32 and of shape [8]
  Returns:
    the warped result of the same shape and data type as image
  """
    hmg_base = shifts_to_homography(ref_width,
                                    ref_height,
                                    corner_shifts,
                                    is_forward=False,
                                    is_matrix=False)
    sx = tf.to_float(ref_width) / tf.to_float(width)
    sy = tf.to_float(ref_height) / tf.to_float(height)
    vec_scale = tf.stack([1, sy / sx, 1 / sx, sx / sy, 1, 1 / sy, sx, sy])
    transform = tf.multiply(hmg_base, vec_scale)
    warped = contrib_image.transform(image, transform, 'bilinear')
    return warped, transform
Ejemplo n.º 4
0
 def apply(self, img):
     input_shape = tf.shape(img)
     tmp_height = tf.maximum(self.height, input_shape[0])
     tmp_width = tf.maximum(self.width, input_shape[1])
     return tf_image.transform(
         tf.image.pad_to_bounding_box(img, 0, 0, tmp_height, tmp_width),
         tf.reshape(self.src_T_out, (-1,))[:8]
     )[:self.height, :self.width]
Ejemplo n.º 5
0
def shear_x(image, level, replace):
  '''Equivalent of PIL Shearing in X dimension.'''
  # Shear parallel to x axis is a projective transform
  # with a matrix form of:
  # [1  level
  #  0  1].
  image = contrib_image.transform(
      wrap(image), [1., level, 0., 0., 1., 0., 0., 0.])
  return unwrap(image, replace)
Ejemplo n.º 6
0
def shear_y(image, level, replace):
  """Equivalent of PIL Shearing in Y dimension."""
  # Shear parallel to y axis is a projective transform
  # with a matrix form of:
  # [1  0
  #  level  1].
  image = contrib_image.transform(
      wrap(image), [1., 0., 0., level, 1., 0., 0., 0.])
  return unwrap(image, replace)
Ejemplo n.º 7
0
def _shear_x(image, level, replace):
    """Equivalent of PIL Shearing in X dimension."""
    # Shear parallel to x axis is a projective transform
    # with a matrix form of:
    # [1  level
    #  0  1].
    image = image_ops.transform(_wrap(image),
                                [1., level, 0., 0., 1., 0., 0., 0.])
    return _unwrap(image, replace)
Ejemplo n.º 8
0
 def __call__(self, img, **kwargs):
     shp = tf.shape(img)
     batch_size, height, width = shp[0], shp[1], shp[2]
     coin = tf.less(tf.compat.v1.random_uniform([batch_size], 0, 1.0), self.p)
     angle_rad = self.angle * 3.141592653589793 / 180.0
     angles = tf.compat.v1.random_uniform([batch_size], -angle_rad, angle_rad)
     angles *= tf.cast(coin, tf.float32)
     f = angles_to_projective_transforms(angles, tf.cast(height, tf.float32), tf.cast(width, tf.float32))
     augm_img = transform(img, f, interpolation='BILINEAR')
     return augm_img
Ejemplo n.º 9
0
def affine_transform(X, rate):
    trans_matrix = tf.eye(2)
    trans_matrix = tf.cond(prob(rate),lambda: rotate(trans_matrix), lambda: trans_matrix)
    trans_matrix = tf.cond(prob(rate),lambda: shear(trans_matrix), lambda: trans_matrix)
    trans_matrix = tf.cond(prob(rate),lambda: scale(trans_matrix), lambda: trans_matrix)
    X = tf.cond(prob(rate),lambda: tf.map_fn(random_erase, X), lambda: X)
    t = tf.cond(prob(rate), translation, lambda: tf.zeros(2))
    a0,a1,b0,b1 = trans_matrix[0][0],trans_matrix[0][1],trans_matrix[1][0],trans_matrix[1][1]
    a2,b2 = t[0],t[1]
    return transform(X, [a0,a1,a2,b0,b1,b2,0,0])
Ejemplo n.º 10
0
def subpixel_homography(image, height, width, dy1, dx1, dy2, dx2, dy3, dx3,
                        dy4, dx4):
    """Applies a homography to an image.

  Args:
    image: input image of shape [input_height, input_width, channels] and of
      data type uint8 or float32
    height: the output image height
    width: the output image width
    dy1: the vertical shift of the top left corner
    dx1: the horizontal shift of the top left corner
    dy2: the vertical shift of the bottom left corner
    dx2: the horizontal shift of the bottom left corner
    dy3: the vertical shift of the top right corner
    dx3: the horizontal shift of the top right corner
    dy4: the vertical shift of the bottom right corner
    dx4: the horizontal shift of the bottom right corner
  Returns:
    the warping result of shape [height, width, channels] with the same data
    type as image
  """
    rx1 = tf.cast(tf.stack([0, 0, 1, 0, 0, 0, 0, 0]), tf.float32)
    ry1 = tf.cast(tf.stack([0, 0, 0, 0, 0, 1, 0, 0]), tf.float32)
    rx2 = tf.cast(
        tf.stack([0, height - 1, 1, 0, 0, 0, 0, -(height - 1) * dx2]),
        tf.float32)
    ry2 = tf.cast(
        tf.stack([0, 0, 0, 0, height - 1, 1, 0, -(height - 1) * dy2]),
        tf.float32)
    rx3 = tf.cast(tf.stack([width - 1, 0, 1, 0, 0, 0, -(width - 1) * dx3, 0]),
                  tf.float32)
    ry3 = tf.cast(tf.stack([0, 0, 0, width - 1, 0, 1, -(width - 1) * dy3, 0]),
                  tf.float32)
    rx4 = tf.cast(
        tf.stack([
            width - 1, height - 1, 1, 0, 0, 0, -(width - 1) * dx4,
            -(height - 1) * dx4
        ]), tf.float32)
    ry4 = tf.cast(
        tf.stack([
            0, 0, 0, width - 1, height - 1, 1, -(width - 1) * dy4,
            -(height - 1) * dy4
        ]), tf.float32)
    mat = tf.stack([rx1, ry1, rx2, ry2, rx3, ry3, rx4, ry4])
    b = tf.reshape(
        tf.cast(tf.stack([dx1, dy1, dx2, dy2, dx3, dy3, dx4, dy4]),
                tf.float32), [8, 1])
    inv_mat = tf.matrix_inverse(mat)
    transformation = tf.reshape(tf.matmul(inv_mat, b), [8])
    warped = contrib_image.transform(image, transformation, 'bilinear')
    cropped = tf.image.crop_to_bounding_box(warped, 0, 0, height, width)
    return cropped
Ejemplo n.º 11
0
def subpixel_crop(image, y, x, height, width):
    """Crops out a region [x, y, x + width, y + height] from an image.

  Args:
    image: input image of shape [input_height, input_width, channels] and of
      data type uint8 or float32
    y: the y coordinate of the top left corner of the cropping window
    x: the x coordinate of the top left corner of the cropping window
    height: the height of the cropping window
    width: the width of the cropping window
  Returns:
    the cropping result of shape [height, width, channels] with the same type
    as image
  """
    transformation = tf.cast(tf.stack([1, 0, x, 0, 1, y, 0, 0]), tf.float32)
    translated = contrib_image.transform(image, transformation, 'bilinear')
    cropped = tf.image.crop_to_bounding_box(translated, 0, 0, height, width)
    return cropped
Ejemplo n.º 12
0
def _random_affine_distort(image):
  source_x = np.array([38, 89, 64])
  source_y = np.array([55, 55, 105])
  rnd = random.randint(0, 728)
  target_x = np.array([source_x[0] + rnd/243-1, 
                       source_x[1] + rnd%81/27-1, 
                       source_x[2] + rnd%9/3-1])
  target_y = np.array([source_y[0] + rnd%243/81-1,
                       source_y[1] + rnd%27/9-1,
                       source_y[2] + rnd%3-1])

  A = np.vstack((source_x, source_y, np.ones(3)))
  A = np.transpose(A)
  tform_x = np.linalg.solve(A, target_x)
  tform_y = np.linalg.solve(A, target_y)
  tform = tform_x.tolist() + tform_y.tolist() + [0, 0]
  image = transform(image, tform, interpolation='BILINEAR')

  return image
Ejemplo n.º 13
0
def homography_warp_per_image(image, width, height, corner_shifts):
  """Transforms an input image using a specified homography.

  Args:
    image: input image of shape [input_height, input_width, channels] and of
      data type uint8 or float32
    width: the homograph is parameterized using the displacements of four
      image corners. width is the width of the image that the corner
      displacement is computed from.
    height: the image height
    corner_shifts: the displacements of the four image corner points of data
      type float32 and of shape [8]
  Returns:
    the warped result of the same shape as image and of data type float32
  """
  transform = shifts_to_homography(width, height, corner_shifts,
                                   is_forward=False, is_matrix=False)
  warped = contrib_image.transform(image, transform, 'bilinear')
  return warped, transform
Ejemplo n.º 14
0
def translate(image, x, y):
  """Translates the image.

  Args:
    image: A 2D float32 tensor.
    x: The x shift of the output, in pixels.
    y: The y shift of the output, in pixels.

  Returns:
    The translated image tensor.
  """
  # TODO(ringw): Fix mixing scalar constants and scalar tensors here.
  one = tf.constant(1, tf.float32)
  zero = tf.constant(0, tf.float32)
  # The inverted transformation matrix expected by tf.contrib.image.transform.
  # The last entry is the 3x3 matrix is left out and is always 1.
  translation_matrix = tf.convert_to_tensor(
      [one, zero, tf.to_float(-x),
       zero, one, tf.to_float(-y),
       zero, zero], tf.float32)  # pyformat: disable
  return contrib_image.transform(image, translation_matrix)
Ejemplo n.º 15
0
def augment_seqs_ava(raw_frames,
                     num_frame,
                     max_shift,
                     batch_size=2,
                     queue_size=60,
                     num_threads=3,
                     train_height=128,
                     train_width=128,
                     pixel_noise=0.0,
                     mix=True,
                     screen=False,
                     mode='train',
                     to_gray=True):
    """Prepares training sequence batches from AVA dataset.

  Args:
    raw_frames: input video frames from AVA dataset
    num_frame: the number of frames in a sequence
    max_shift: the range each image corner point can move
    batch_size: the size of training or testing batches
    queue_size: the queue size of the shuffle buffer
    num_threads: the number of threads of the shuffle buffer
    train_height: the height of the training/testing images
    train_width: the width of the training/testing images
    pixel_noise: the magnitude of additive noises
    mix: whether mix the magnitude of corner point shifts
    screen: whether remove highly distorted homographies
    mode: 'train' or 'eval', specifying whether preparing images for training or
      testing
    to_gray: whether prepare color or gray scale training images
  Returns:
    a batch of training images and the corresponding ground-truth homographies
  """
    if to_gray:
        output_frames = tf.image.rgb_to_grayscale(raw_frames)
        num_channel = 1
    else:
        output_frames = raw_frames
        num_channel = 3

    frame_height = tf.to_float(tf.shape(output_frames)[1])
    frame_width = tf.to_float(tf.shape(output_frames)[2])

    if mix:
        p = tf.random_uniform([], minval=0, maxval=1, dtype=tf.float32)
        scale = (tf.to_float(tf.greater(p, 0.1)) + tf.to_float(
            tf.greater(p, 0.2)) + tf.to_float(tf.greater(p, 0.3))) / 3
    else:
        scale = 1.0
    new_max_shift = max_shift * scale
    rand_shift_base = tf.random_uniform([num_frame, 8],
                                        minval=-new_max_shift,
                                        maxval=new_max_shift,
                                        dtype=tf.float32)
    crop_width = frame_width - 2 * new_max_shift - 1
    crop_height = frame_height - 2 * new_max_shift - 1
    ref_window = tf.to_float(
        tf.stack([
            0, 0, 0, crop_height - 1, crop_width - 1, 0, crop_width - 1,
            crop_height - 1
        ]))
    if screen:
        new_shift_list = []
        flag_list = []
        hmg_list = []
        src_points = tf.reshape(ref_window, [4, 2])
        for i in range(num_frame):
            dst_points = tf.reshape(
                rand_shift_base[i] + ref_window + new_max_shift, [4, 2])
            hmg = calc_homography_from_points(src_points, dst_points)
            hmg_list.append(hmg)
        for i in range(num_frame - 1):
            hmg = tf.matmul(tf.matrix_inverse(hmg_list[i + 1]), hmg_list[i])
            shift = homography_to_shifts(hmg, crop_width, crop_height)
            angles = calc_homography_distortion(crop_width, crop_height, shift)
            max_angle = tf.reduce_min(angles)
            flag = tf.to_float(max_angle >= -0.707)
            flag_list.append(flag)
            if i > 0:
                new_shift = rand_shift_base[i] * flag * flag_list[i - 1]
            else:
                new_shift = rand_shift_base[i] * flag
            new_shift_list.append(new_shift)
        new_shift_list.append(rand_shift_base[num_frame - 1] *
                              flag_list[num_frame - 2])
        rand_shift = tf.stack(new_shift_list)
    else:
        rand_shift = rand_shift_base

    mat_scale = tf.diag(
        tf.stack([crop_width / train_width, crop_height / train_height, 1.0]))
    inv_mat_scale = tf.matrix_inverse(mat_scale)
    hmg_list = []
    frame_list = []
    for i in range(num_frame):
        src_points = tf.reshape(ref_window, [4, 2])
        dst_points = tf.reshape(rand_shift[i] + ref_window + new_max_shift,
                                [4, 2])
        hmg = calc_homography_from_points(src_points, dst_points)
        hmg_list.append(hmg)
        transform = tf.reshape(hmg, [9]) / hmg[2, 2]
        warped = contrib_image.transform(output_frames[i], transform[:8],
                                         'bilinear')
        crop_window = tf.expand_dims(
            tf.stack([
                0, 0, (crop_height - 1) / (frame_height - 1),
                (crop_width - 1) / (frame_width - 1)
            ]), 0)
        resized_base = tf.image.crop_and_resize(tf.expand_dims(warped, 0),
                                                crop_window, [0],
                                                [train_height, train_width])
        resized = tf.squeeze(resized_base, [0])

        noise_im = tf.truncated_normal(shape=tf.shape(resized),
                                       mean=0.0,
                                       stddev=pixel_noise,
                                       dtype=tf.float32)
        noise_frame = normalize_image(tf.to_float(resized) + noise_im)
        frame_list.append(noise_frame)
    noise_frames = tf.reshape(tf.stack(
        frame_list, 2), (train_height, train_width, num_frame * num_channel))

    label_list = []
    for i in range(num_frame - 1):
        hmg_combine = tf.matmul(tf.matrix_inverse(hmg_list[i + 1]),
                                hmg_list[i])
        hmg_final = tf.matmul(inv_mat_scale, tf.matmul(hmg_combine, mat_scale))
        label = homography_to_shifts(hmg_final, train_width, train_height)
        label_list.append(label)
    labels = tf.reshape(tf.stack(label_list, 0), [(num_frame - 1) * 8])

    if mode == 'train':
        min_after_dequeue = int(queue_size / 3)
    else:
        min_after_dequeue = batch_size * 3
    batch_frames, batch_labels = tf.train.shuffle_batch(
        [noise_frames, labels],
        batch_size=batch_size,
        num_threads=num_threads,
        capacity=queue_size,
        min_after_dequeue=min_after_dequeue,
        enqueue_many=False)

    return tf.cast(batch_frames, tf.float32), tf.cast(batch_labels, tf.float32)
Ejemplo n.º 16
0
def translate(images, tx, ty, interpolation='NEAREST'):
    transforms = [1, 0, -tx, 0, 1, -ty, 0, 0]
    return transform(images, transforms, interpolation)
Ejemplo n.º 17
0
    mul = t_params.pop('mul')
    agn = t_params.pop('agn')
    color_m = t_params.pop('color_m')
    resize_smooth = t_params.pop('resize_smooth')

    t_matrix = assemble_transformation_matrix(**t_params)

    for k in range(n):
        inp = np.squeeze(X[k])

        if agn is not None:
            gauss = rnd.normal(0, rnd.uniform(agn[0], agn[1]), inp.shape)
        else:
            gauss = None

        x_t = transform(inp, t_matrix, h_flip, gauss, resize, resize_smooth, mul, color_m)
        X_t.append(x_t)

    return np.array(X_t)


def generate_random_sequences(X, Y, sequence_size = 32, shift = 16, rseed = 0, final_size = None,
                              t_params_f = None, final_heatmap_size = None):
    rnd = np.random.RandomState(rseed)

    if not t_params_f:
        raise Exception('No attributes given!')

    if final_size is None:
        final_size = min(X.shape[2], X.shape[3])