Beispiel #1
0
def _using_motion_vector(depth,
                         translation,
                         rotation_angles,
                         intrinsic_mat,
                         intrinsic_mat_inv=None):
  """A helper for using_motion_vector. See docstring therein."""

  if translation.shape.ndims not in (2, 4):
    raise ValueError('\'translation\' should have rank 2 or 4, not %d' %
                     translation.shape.ndims)
  if translation.shape[-1] != 3:
    raise ValueError('translation\'s last dimension should be 3, not %d' %
                     translation.shape[1])
  if translation.shape.ndims == 2:
    translation = tf.expand_dims(tf.expand_dims(translation, 1), 1)

  _, height, width = tf.unstack(tf.shape(depth))
  grid = tf.squeeze(
      tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))), axis=3)
  grid = tf.to_float(grid)
  if intrinsic_mat_inv is None:
    intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat)

  # Use the depth map and the inverse intrinsic matrix to generate a point
  # cloud xyz.
  xyz = tf.einsum('bij,jhw,bhw->bihw', intrinsic_mat_inv, grid, depth)

  # TPU pads aggressively tensors that have small dimensions. Therefore having
  # A rotation of the shape [....., 3, 3] would overflow the HBM memory. To
  # address this, we represnet the rotations is a 3x3 nested python tuple of
  # tf.Tensors (that is, we unroll the rotation matrix at the small dimensions).
  # The 3x3 matrix multiplication is now done in a python loop, and tensors with
  # small dimensions are avoided.
  unstacked_xyz = tf.unstack(xyz, axis=1)
  unstacked_rotation_matrix = transform_utils.unstacked_matrix_from_angles(
      *tf.unstack(rotation_angles, axis=-1))
  rank_diff = (
      unstacked_xyz[0].shape.ndims -
      unstacked_rotation_matrix[0][0].shape.ndims)

  def expand_to_needed_rank(t):
    for _ in range(rank_diff):
      t = tf.expand_dims(t, -1)
    return t

  unstacked_rotated_xyz = [0.0] * 3
  for i in range(3):
    for j in range(3):
      unstacked_rotated_xyz[i] += expand_to_needed_rank(
          unstacked_rotation_matrix[i][j]) * unstacked_xyz[j]
  rotated_xyz = tf.stack(unstacked_rotated_xyz, axis=1)

  # Project the transformed point cloud back to the camera plane.
  pcoords = tf.einsum('bij,bjhw->bihw', intrinsic_mat, rotated_xyz)

  projected_translation = tf.einsum('bij,bhwj->bihw', intrinsic_mat,
                                    translation)
  pcoords += projected_translation
  x, y, z = tf.unstack(pcoords, axis=1)
  return x / z, y / z, z
Beispiel #2
0
def _using_transform_matrix(depth,
                            transform,
                            intrinsic_mat,
                            intrinsic_mat_inv=None):
  """A helper for using_transform_matrix. See docstring therein."""
  with tf.name_scope('Transform', values=[depth, transform, intrinsic_mat]):
    _, height, width = tf.unstack(tf.shape(depth))
    grid = tf.squeeze(
        tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))), axis=3)
    grid = tf.to_float(grid)
    if intrinsic_mat_inv is None:
      intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat)
    cam_coords = tf.einsum('bij,jhw,bhw->bihw', intrinsic_mat_inv, grid, depth)

    rotation = transform[:, :3, :3]
    translation = transform[:, :3, 3]

    xyz = (
        tf.einsum('bij,bjk,bkhw->bihw', intrinsic_mat, rotation, cam_coords) +
        _expand_last_dim_twice(
            tf.einsum('bij,bj->bi', intrinsic_mat, translation)))

    x, y, z = tf.unstack(xyz, axis=1)
    pixel_x = x / z
    pixel_y = y / z
    return pixel_x, pixel_y, z
Beispiel #3
0
def cutout(x, toss, ratio=[1, 2]):
    batch_size = tf.shape(x)[0]
    image_size = tf.shape(x)[1:3]
    cutout_size = image_size * ratio[0] // ratio[1]
    offset_x = tf.random.uniform([tf.shape(x)[0], 1, 1],
                                 maxval=image_size[0] +
                                 (1 - cutout_size[0] % 2),
                                 dtype=tf.int32)
    offset_y = tf.random.uniform([tf.shape(x)[0], 1, 1],
                                 maxval=image_size[1] +
                                 (1 - cutout_size[1] % 2),
                                 dtype=tf.int32)
    grid_batch, grid_x, grid_y = tf.meshgrid(tf.range(batch_size,
                                                      dtype=tf.int32),
                                             tf.range(cutout_size[0],
                                                      dtype=tf.int32),
                                             tf.range(cutout_size[1],
                                                      dtype=tf.int32),
                                             indexing='ij')
    cutout_grid = tf.stack([
        grid_batch, grid_x + offset_x - cutout_size[0] // 2,
        grid_y + offset_y - cutout_size[1] // 2
    ],
                           axis=-1)
    mask_shape = tf.stack([batch_size, image_size[0], image_size[1]])
    cutout_grid = tf.maximum(cutout_grid, 0)
    cutout_grid = tf.minimum(cutout_grid,
                             tf.reshape(mask_shape - 1, [1, 1, 1, 3]))
    mask = tf.maximum(
        1 - tf.reshape(toss, [-1, 1, 1]) * tf.scatter_nd(
            cutout_grid,
            tf.ones([batch_size, cutout_size[0], cutout_size[1]],
                    dtype=tf.float32), mask_shape), 0)
    x = x * tf.expand_dims(mask, axis=3)
    return x
Beispiel #4
0
    def _batch_slice(self, ary, start_ijk, w, batch_size):
        """Batched slicing of original grid.

    Args:
      ary: tensor, rank = 3.
      start_ijk: [batch_size, 3] tensor, starting index.
      w: width of cube to extract.
      batch_size: int, batch size.

    Returns:
      batched_slices: [batch_size, w, w, w] tensor, batched slices of ary.
    """
        batch_size = start_ijk.shape[0]
        ijk = tf.range(w, dtype=tf.int32)
        slice_idx = tf.meshgrid(ijk, ijk, ijk, indexing='ij')
        slice_idx = tf.stack(
            slice_idx, axis=-1)  # [in_grid_res, in_grid_res, in_grid_res, 3]
        slice_idx = tf.broadcast_to(slice_idx[tf.newaxis],
                                    [batch_size, w, w, w, 3])
        offset = tf.broadcast_to(
            start_ijk[:, tf.newaxis, tf.newaxis, tf.newaxis, :],
            [batch_size, w, w, w, 3])
        slice_idx += offset
        # [batch_size, in_grid_res, in_grid_res, in_grid_res, 3]
        batched_slices = tf.gather_nd(ary, slice_idx)
        # [batch_size, in_grid_res, in_grid_res, in_grid_res]
        return batched_slices
def flow_gather(source_images, flows):
    """Gather from a tensor of images.

  Args:
    source_images: 5D tensor of images [B, H, W, D, 3]
    flows: 5D tensor of x/y offsets to gather for each slice (pixel offsets)
  Returns:
    warped_imgs_reshape: 5D tensor of gathered (warped) images [B, H, W, D, 3]
  """
    batchsize = tf.shape(source_images)[0]
    height = tf.shape(source_images)[1]
    width = tf.shape(source_images)[2]
    num_depths = tf.shape(source_images)[3]
    source_images_reshape = tf.reshape(
        tf.transpose(source_images, [0, 3, 1, 2, 4]),
        [batchsize * num_depths, height, width, 3])
    flows_reshape = tf.reshape(tf.transpose(flows, [0, 3, 1, 2, 4]),
                               [batchsize * num_depths, height, width, 2])
    _, h, w = tf.meshgrid(tf.range(tf.to_float(batchsize * num_depths),
                                   dtype=tf.float32),
                          tf.range(tf.to_float(height), dtype=tf.float32),
                          tf.range(tf.to_float(width), dtype=tf.float32),
                          indexing='ij')
    coords_y = tf.clip_by_value(h + flows_reshape[Ellipsis, 0], 0.0,
                                tf.to_float(height))
    coords_x = tf.clip_by_value(w + flows_reshape[Ellipsis, 1], 0.0,
                                tf.to_float(width))
    sampling_coords = tf.stack([coords_x, coords_y], axis=-1)
    warped_imgs = contrib_resampler.resampler(source_images_reshape,
                                              sampling_coords)
    warped_imgs_reshape = tf.transpose(
        tf.reshape(warped_imgs, [batchsize, num_depths, height, width, 3]),
        [0, 2, 3, 1, 4])
    return warped_imgs_reshape
Beispiel #6
0
 def __init__(self, config, name='decoder'):
     super(Decoder, self).__init__(name=name)
     assert len(config['dec_channel']) == len(config['dec_kernel'])
     assert len(config['dec_channel']) == len(config['dec_shape'])
     with self._enter_variable_scope(check_same_graph=False):
         dec_shape_list = [(n, n) if isinstance(n, int) else n for n in config['dec_shape']]
         plane_ht, plane_wd = dec_shape_list[0]
         with tf.name_scope('grid'):
             rows = tf.linspace(-1.0, 1.0, plane_ht)
             cols = tf.linspace(-1.0, 1.0, plane_wd)
             grid_rows, grid_cols = tf.meshgrid(rows, cols)
             self._grid = tf.expand_dims(tf.stack([grid_cols, grid_rows], axis=-1), axis=0)
         self._layers = []
         for idx, (channel, kernel, shape) in enumerate(
                 zip(config['dec_channel'], config['dec_kernel'], dec_shape_list)):
             if (plane_ht, plane_wd) != shape:
                 self._layers.append(partial(tf.image.resize_bilinear, size=shape, name='resize_{}'.format(idx)))
             self._layers += [
                 snt.Conv2D(channel, kernel, padding='VALID', name='conv_{}'.format(idx)),
                 partial(tf.nn.relu, name='relu_{}'.format(idx)),
             ]
             plane_ht -= kernel - 1
             plane_wd -= kernel - 1
         if [plane_ht, plane_wd] != config['image_shape'][:2]:
             self._layers.append(
                 partial(tf.image.resize_bilinear, size=config['image_shape'][:2], name='resize_out'))
         self._image_ch = config['image_shape'][-1]
         self._layers.append(snt.Conv2D(self._image_ch + 1, 1, name='conv_out'))
Beispiel #7
0
    def img2mpi(self, img, depth, planedepths):
        """Compute ground truth MPI of visible content using depth map."""

        height = tf.shape(img)[1]
        width = tf.shape(img)[2]
        num_depths = planedepths.shape[0]
        depth_inds = (tf.to_float(num_depths) - 1) * (
            (1.0 / depth) -
            (1.0 / planedepths[0])) / ((1.0 / planedepths[-1]) -
                                       (1.0 / planedepths[0]))
        depth_inds = tf.round(depth_inds)
        depth_inds_tile = tf.to_int32(
            tf.tile(depth_inds[:, :, :, tf.newaxis], [1, 1, 1, num_depths]))
        _, _, d = tf.meshgrid(tf.range(height),
                              tf.range(width),
                              tf.range(num_depths),
                              indexing='ij')
        mpi_colors = tf.to_float(
            tf.tile(img[:, :, :, tf.newaxis, :], [1, 1, 1, num_depths, 1]))
        mpi_alphas = tf.to_float(
            tf.where(tf.equal(depth_inds_tile, d),
                     tf.ones_like(depth_inds_tile),
                     tf.zeros_like(depth_inds_tile)))
        mpi = tf.concat([mpi_colors, mpi_alphas[Ellipsis, tf.newaxis]], axis=4)
        return mpi
def affine_grid_generator(height, width, theta):
    """
    This function returns a sampling grid, which when
    used with the bilinear sampler on the input feature
    map, will create an output feature map that is an
    affine transformation [1] of the input feature map.
    Input
    -----
    - height: desired height of grid/output. Used
      to downsample or upsample.
    - width: desired width of grid/output. Used
      to downsample or upsample.
    - theta: affine transform matrices of shape (num_batch, 2, 3).
      For each image in the batch, we have 6 theta parameters of
      the form (2x3) that define the affine transformation T.
    Returns
    -------
    - normalized gird (-1, 1) of shape (num_batch, 2, H, W).
      The 2nd dimension has 2 components: (x, y) which are the
      sampling points of the original image for each point in the
      target image.
    Note
    ----
    [1]: the affine transformation allows cropping, translation,
         and isotropic scaling.
    """
    # grab batch size
    num_batch = tf.shape(theta)[0]

    # create normalized 2D grid
    x = tf.linspace(-1.0, 1.0, width)
    y = tf.linspace(-1.0, 1.0, height)
    x_t, y_t = tf.meshgrid(x, y)

    # flatten
    x_t_flat = tf.reshape(x_t, [-1])
    y_t_flat = tf.reshape(y_t, [-1])

    # reshape to (x_t, y_t , 1)
    ones = tf.ones_like(x_t_flat)
    sampling_grid = tf.stack([x_t_flat, y_t_flat, ones])

    # repeat grid num_batch times
    sampling_grid = tf.expand_dims(sampling_grid, axis=0)
    sampling_grid = tf.tile(sampling_grid, tf.stack([num_batch, 1, 1]))

    # cast to float32 (required for matmul)
    theta = tf.cast(theta, 'float32')
    sampling_grid = tf.cast(sampling_grid, 'float32')

    # transform the sampling grid - batch multiply
    batch_grids = tf.matmul(theta, sampling_grid)
    # batch grid has shape (num_batch, 2, H*W)

    # reshape to (num_batch, H, W, 2)
    batch_grids = tf.reshape(batch_grids, [num_batch, 2, height, width])
    # batch_grids = tf.transpose(batch_grids, [0, 2, 1, 3])

    return batch_grids
Beispiel #9
0
 def get_offset(self, cell_size: int, num_anchors: int):
     x = tf.range(cell_size, dtype=tf.float32)
     y = tf.range(cell_size, dtype=tf.float32)
     xx, yy = tf.meshgrid(x, y)
     offset = tf.stack([xx, yy], axis=-1)
     offset = tf.expand_dims(offset, axis=2)
     offset = tf.tile(offset, [1, 1, num_anchors, 1])
     return offset
Beispiel #10
0
def grid_coord(h, w, d):
    xl = tf.linspace(-1.0, 1.0, w)
    yl = tf.linspace(-1.0, 1.0, h)
    zl = tf.linspace(-1.0, 1.0, d)

    xs, ys, zs = tf.meshgrid(xl, yl, zl, indexing='ij')
    g = tf.concat(0,[flatten(xs), flatten(ys), flatten(zs)])
    return g
def get_grid(shape, name='grid'):
    with tf.name_scope(name):
        rows = tf.linspace(-1.0, 1.0, shape[0])
        cols = tf.linspace(-1.0, 1.0, shape[1])
        grid_cols, grid_rows = tf.meshgrid(cols, rows)
        grid = tf.expand_dims(tf.stack([grid_cols, grid_rows], axis=-1),
                              axis=0)
    return grid
def generate_heatmap_target_sigmas_rotation(heatmap_size, landmarks, sigmas, rotation, scale=1.0, normalize=False, data_format='channels_first'):
    """
    Generates heatmap images for the given parameters.
    :param heatmap_size: The image size of a single heatmap.
    :param landmarks: The list of landmarks. For each landmark, a heatmap on the given coordinate will be generated. If landmark.is_valid is False, then the heatmap will be empty.
    :param sigmas: The sigmas for the individual heatmaps. May be either fixed, or trainable.
    :param rotation: The rotation of the heatmap. May be either fixed, or trainable.
    :param scale: The scale factor for each heatmap. Each pixel value will be multiplied by this value.
    :param normalize: If true, each heatmap value will be multiplied by the normalization factor of the gaussian.
    :param data_format: The data format of the resulting tensor of heatmap images.
    :return: The tensor of heatmap images.
    """
    landmarks_shape = landmarks.get_shape().as_list()
    sigmas_shape = sigmas.get_shape().as_list()
    batch_size = landmarks_shape[0]
    num_landmarks = landmarks_shape[1]
    dim = landmarks_shape[2] - 1
    assert dim == 2, 'Currently only dim == 2 is supported.'
    assert len(heatmap_size) == dim, 'Dimensions do not match.'
    assert sigmas_shape[0] == num_landmarks, 'Number of sigmas does not match.'

    rotation_matrix = tf.stack([tf.stack([tf.cos(rotation), -tf.sin(rotation)], axis=-1), tf.stack([tf.sin(rotation), tf.cos(rotation)], axis=-1)], axis=-1)
    rotation_matrix_t = tf.stack([tf.stack([tf.cos(rotation), tf.sin(rotation)], axis=-1), tf.stack([-tf.sin(rotation), tf.cos(rotation)], axis=-1)], axis=-1)
    det_covariances = tf.reduce_prod(sigmas, axis=-1)
    sigmas_inv_eye = tf.eye(dim, dim, batch_shape=[num_landmarks]) * tf.expand_dims(1.0 / sigmas, -1)
    inv_covariances = tf.matmul(tf.matmul(rotation_matrix, sigmas_inv_eye), rotation_matrix_t)

    if data_format == 'channels_first':
        heatmap_axis = 1
        landmarks_reshaped = tf.reshape(landmarks[..., 1:], [batch_size, num_landmarks] + [1] * dim + [dim])
        is_valid_reshaped = tf.reshape(landmarks[..., 0], [batch_size, num_landmarks] + [1] * dim)
        det_covariances_reshaped = tf.reshape(det_covariances, [1, num_landmarks] + [1] * dim)
        inv_covariances_reshaped = tf.reshape(inv_covariances, [1, num_landmarks] + [1] * dim + [dim, dim])
    else:
        heatmap_axis = dim + 1
        landmarks_reshaped = tf.reshape(landmarks[..., 1:], [batch_size] + [1] * dim + [num_landmarks, dim])
        is_valid_reshaped = tf.reshape(landmarks[..., 0], [batch_size] + [1] * dim + [num_landmarks])
        det_covariances_reshaped = tf.reshape(det_covariances, [1] + [1] * dim + [num_landmarks])
        inv_covariances_reshaped = tf.reshape(inv_covariances, [1] + [1] * dim + [num_landmarks, dim, dim])

    aranges = [np.arange(s) for s in heatmap_size]
    grid = tf.meshgrid(*aranges, indexing='ij')

    grid_stacked = tf.stack(grid, axis=dim)
    grid_stacked = tf.cast(grid_stacked, tf.float32)
    grid_stacked = tf.stack([grid_stacked] * batch_size, axis=0)
    grid_stacked = tf.stack([grid_stacked] * num_landmarks, axis=heatmap_axis)

    if normalize:
        scale /= tf.sqrt(tf.pow(2 * np.pi, dim) * det_covariances_reshaped)

    x_minus_mu = grid_stacked - landmarks_reshaped
    exp_factor = tf.reduce_sum(tf.reduce_sum(tf.expand_dims(x_minus_mu, -1) * inv_covariances_reshaped * tf.expand_dims(x_minus_mu, -2), axis=-1), axis=-1)
    heatmap = scale * tf.exp(-0.5 * exp_factor)
    heatmap_or_zeros = tf.where((is_valid_reshaped + tf.zeros_like(heatmap)) > 0, heatmap, tf.zeros_like(heatmap))

    return heatmap_or_zeros
Beispiel #13
0
def yolo_layer(inputs, n_classes, anchors, img_size, data_format):
    """Creates Yolo final detection layer.

    Detects boxes with respect to anchors.

    Args:
        inputs: Tensor input.
        n_classes: Number of labels.
        anchors: A list of anchor sizes.
        img_size: The input size of the model.
        data_format: The input format.

    Returns:
        Tensor output.
    """
    n_anchors = len(anchors)

    inputs = tf.layers.conv2d(inputs,
                              filters=n_anchors * (5 + n_classes),
                              kernel_size=1,
                              strides=1,
                              use_bias=True,
                              data_format=data_format)

    shape = inputs.get_shape().as_list()
    grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3]
    if data_format == 'channels_first':
        inputs = tf.transpose(inputs, [0, 2, 3, 1])
    inputs = tf.reshape(
        inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1], 5 + n_classes])

    strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1])

    box_centers, box_shapes, confidence, classes = \
        tf.split(inputs, [2, 2, 1, n_classes], axis=-1)

    x = tf.range(grid_shape[0], dtype=tf.float32)
    y = tf.range(grid_shape[1], dtype=tf.float32)
    x_offset, y_offset = tf.meshgrid(x, y)
    x_offset = tf.reshape(x_offset, (-1, 1))
    y_offset = tf.reshape(y_offset, (-1, 1))
    x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
    x_y_offset = tf.tile(x_y_offset, [1, n_anchors])
    x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
    box_centers = tf.nn.sigmoid(box_centers)
    box_centers = (box_centers + x_y_offset) * strides

    anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1])
    box_shapes = tf.exp(box_shapes) * tf.to_float(anchors)

    confidence = tf.nn.sigmoid(confidence)

    classes = tf.nn.sigmoid(classes)

    inputs = tf.concat([box_centers, box_shapes, confidence, classes], axis=-1)

    return inputs
        def graph_fn():
            y, x = tf.meshgrid(tf.range(32, dtype=tf.float32),
                               tf.range(32, dtype=tf.float32))
            blist = box_list.BoxList(
                tf.constant([[0., 0., 32., 32.], [0., 0., 16., 16.],
                             [0.0, 0.0, 4.0, 4.0]]))
            classes = tf.constant([[0., 1., 0.], [1., 0., 0.], [0., 0., 1.]])

            result = ta_utils.coordinates_to_iou(y, x, blist, classes)
            return result
    def _get_xy_ctr(self, score_size, score_offset, total_stride):
        fm_height, fm_width = score_size, score_size

        y_list = tf.linspace(0., fm_height - 1., fm_height)
        x_list = tf.linspace(0., fm_width - 1., fm_width)
        X, Y = tf.meshgrid(x_list, y_list)

        XY = score_offset + tf.stack([X, Y], axis=-1) * total_stride
        XY = tf.reshape(XY, (1, fm_height * fm_width, 2))
        return XY
Beispiel #16
0
def apply_line_prediction(inputs,
                          features,
                          blur_steps,
                          learn_alpha=True,
                          name=None):
  """Applies "Line Prediction" layer to input images."""
  inputs.shape.assert_is_compatible_with([None, None, None, 6])

  with tf.name_scope(name, 'blur_prediction', values=[inputs, features]):

    with tf.name_scope(None, 'input_frames', values=[inputs]):
      frames = [inputs[:, :, :, :3], inputs[:, :, :, 3:]]

    with tf.name_scope(None, 'frame_size', values=[inputs, features]):
      shape = tf.shape(inputs)
      height = shape[1]
      width = shape[2]

    with tf.name_scope(None, 'identity_warp', values=[]):
      x_idx, y_idx = tf.meshgrid(tf.range(width), tf.range(height))
      identity_warp = tf.to_float(tf.stack([x_idx, y_idx], axis=-1))
      identity_warp = identity_warp[tf.newaxis, :, :, tf.newaxis, :]

      warp_steps = tf.to_float(tf.range(blur_steps - 1) + 1) / (blur_steps - 1)
      warp_steps = warp_steps[tf.newaxis, tf.newaxis, tf.newaxis, :, tf.newaxis]

      max_warps = tf.to_float(tf.stack([width - 1, height - 1]))
      max_warps = max_warps[tf.newaxis, tf.newaxis, tf.newaxis, tf.newaxis, :]

    output_frames = []
    for frame in frames:
      with tf.name_scope(None, 'predict_blurs', values=[features]):
        flow = tf.layers.conv2d(features, 2, 1, padding='same')

        if learn_alpha:
          alpha = tf.layers.conv2d(
              features, blur_steps, 1, padding='same', activation=tf.nn.softmax)

      with tf.name_scope(None, 'apply_blurs', values=[]):
        with tf.name_scope(None, 'warp', values=[frame, flow]):
          warps = identity_warp + flow[:, :, :, tf.newaxis, :] * warp_steps
          warps = tf.clip_by_value(warps, 0.0, max_warps)
          warped = contrib_resampler.resampler(frame, warps)
          warped = tf.concat([frame[:, :, :, tf.newaxis, :], warped], axis=3)

        with tf.name_scope(None, 'apply_alpha', values=[frame, flow]):
          if learn_alpha:
            mask = alpha[:, :, :, :, tf.newaxis]
          else:
            mask = 1.0 / blur_steps
          output_frames.append(tf.reduce_sum(warped * mask, axis=3))

    with tf.name_scope(None, 'outputs', values=[output_frames]):
      output = tf.add_n(output_frames) / len(frames)
      return output
def tile_anchors(grid_height, grid_width, scales, aspect_ratios, anchor_stride,
                 anchor_offset):
    """
    It returns boxes in absolute coordinates.

    Arguments:
        grid_height: a scalar int tensor, size of the grid in the y direction.
        grid_width: a scalar int tensor, size of the grid in the x direction.
        scales: a float tensor with shape [N],
            it represents the scale of each box in the basis set.
        aspect_ratios: a float tensor with shape [N],
            it represents the aspect ratio of each box in the basis set.
        anchor_stride: a tuple of float scalar tensors,
            difference in centers between anchors for adjacent grid positions.
        anchor_offset: a tuple of float scalar tensors,
            center of the anchor on upper left element of the grid ((0, 0)-th anchor).
    Returns:
        a float tensor with shape [grid_height * grid_width * N, 4].
    """
    N = tf.size(scales)
    ratio_sqrts = tf.sqrt(aspect_ratios)
    heights = scales / ratio_sqrts
    widths = scales * ratio_sqrts
    # widths/heights = aspect_ratios,
    # and scales = sqrt(heights * widths)

    # get a grid of box centers
    y_centers = tf.to_float(
        tf.range(grid_height)) * anchor_stride[0] + anchor_offset[0]
    x_centers = tf.to_float(
        tf.range(grid_width)) * anchor_stride[1] + anchor_offset[1]
    x_centers, y_centers = tf.meshgrid(x_centers, y_centers)
    # they have shape [grid_height, grid_width]

    centers = tf.stack([y_centers, x_centers], axis=2)
    centers = tf.expand_dims(centers, 2)
    centers = tf.tile(centers, [1, 1, N, 1])
    # shape [grid_height, grid_width, N, 2]

    sizes = tf.stack([heights, widths], axis=1)
    sizes = tf.expand_dims(tf.expand_dims(sizes, 0), 0)
    sizes = tf.tile(sizes, [grid_height, grid_width, 1, 1])
    # shape [grid_height, grid_width, N, 2]

    boxes = tf.concat([centers - 0.5 * sizes, centers + 0.5 * sizes], axis=3)
    # it has shape [grid_height, grid_width, N, 4]
    boxes = tf.reshape(boxes, [-1, 4])
    return boxes
def _using_motion_vector(depth, translation, rotation_angles, intrinsic_mat):
    """A helper for using_motion_vector. See docstring therein."""

    if translation.shape.ndims not in (2, 4):
        raise ValueError(
            "'translation' should have rank 2 or 4, not %d" % translation.shape.ndims
        )
    if translation.shape[-1] != 3:
        raise ValueError(
            "translation's last dimension should be 3, not %d" % translation.shape[1]
        )
    if translation.shape.ndims == 2:
        translation = tf.expand_dims(tf.expand_dims(translation, 1), 1)

    _, height, width = tf.unstack(tf.shape(depth))
    grid = tf.squeeze(
        tf.stack(tf.meshgrid(tf.range(width), tf.range(height), (1,))), axis=3
    )
    grid = tf.to_float(grid)
    intrinsic_mat_inv = tf.linalg.inv(intrinsic_mat)

    rot_mat = transform_utils.matrix_from_angles(rotation_angles)
    # We have to treat separately the case of a per-image rotation vector and a
    # per-image rotation field, because the broadcasting capabilities of einsum
    # are limited.
    if rotation_angles.shape.ndims == 2:
        # The calculation here is identical to the one in inverse_warp above.
        # Howeverwe use einsum for better clarity. Under the hood, einsum performs
        # the reshaping and invocation of BatchMatMul, instead of doing it manually,
        # as in inverse_warp.
        projected_rotation = tf.einsum(
            "bij,bjk,bkl->bil", intrinsic_mat, rot_mat, intrinsic_mat_inv
        )
        pcoords = tf.einsum("bij,jhw,bhw->bihw", projected_rotation, grid, depth)
    elif rotation_angles.shape.ndims == 4:
        # We push the H and W dimensions to the end, and transpose the rotation
        # matrix elements (as noted above).
        rot_mat = tf.transpose(rot_mat, [0, 3, 4, 1, 2])
        projected_rotation = tf.einsum(
            "bij,bjkhw,bkl->bilhw", intrinsic_mat, rot_mat, intrinsic_mat_inv
        )
        pcoords = tf.einsum("bijhw,jhw,bhw->bihw", projected_rotation, grid, depth)

    projected_translation = tf.einsum("bij,bhwj->bihw", intrinsic_mat, translation)
    pcoords += projected_translation
    x, y, z = tf.unstack(pcoords, axis=1)
    return x / z, y / z, z
def generate_heatmap_target(heatmap_size, landmarks, sigmas, scale=1.0, normalize=False, data_format='channels_first'):
    """
    Generates heatmap images for the given parameters.
    :param heatmap_size: The image size of a single heatmap.
    :param landmarks: The list of landmarks. For each landmark, a heatmap on the given coordinate will be generated. If landmark.is_valid is False, then the heatmap will be empty.
    :param sigmas: The sigmas for the individual heatmaps. May be either fixed, or trainable.
    :param scale: The scale factor for each heatmap. Each pixel value will be multiplied by this value.
    :param normalize: If true, each heatmap value will be multiplied by the normalization factor of the gaussian.
    :param data_format: The data format of the resulting tensor of heatmap images.
    :return: The tensor of heatmap images.
    """
    landmarks_shape = landmarks.get_shape().as_list()
    sigmas_shape = sigmas.get_shape().as_list()
    batch_size = landmarks_shape[0]
    num_landmarks = landmarks_shape[1]
    dim = landmarks_shape[2] - 1
    assert len(heatmap_size) == dim, 'Dimensions do not match.'
    assert sigmas_shape[0] == num_landmarks, 'Number of sigmas does not match.'

    if data_format == 'channels_first':
        heatmap_axis = 1
        landmarks_reshaped = tf.reshape(landmarks[..., 1:], [batch_size, num_landmarks] + [1] * dim + [dim])
        is_valid_reshaped = tf.reshape(landmarks[..., 0], [batch_size, num_landmarks] + [1] * dim)
        sigmas_reshaped = tf.reshape(sigmas, [1, num_landmarks] + [1] * dim)
    else:
        heatmap_axis = dim + 1
        landmarks_reshaped = tf.reshape(landmarks[..., 1:], [batch_size] + [1] * dim + [num_landmarks, dim])
        is_valid_reshaped = tf.reshape(landmarks[..., 0], [batch_size] + [1] * dim + [num_landmarks])
        sigmas_reshaped = tf.reshape(sigmas, [1] + [1] * dim + [num_landmarks])

    aranges = [np.arange(s) for s in heatmap_size]
    grid = tf.meshgrid(*aranges, indexing='ij')

    grid_stacked = tf.stack(grid, axis=dim)
    grid_stacked = tf.cast(grid_stacked, tf.float32)
    grid_stacked = tf.stack([grid_stacked] * batch_size, axis=0)
    grid_stacked = tf.stack([grid_stacked] * num_landmarks, axis=heatmap_axis)

    if normalize:
        scale /= tf.pow(np.sqrt(2 * np.pi) * sigmas_reshaped, dim)

    squared_distances = tf.reduce_sum(tf.pow(grid_stacked - landmarks_reshaped, 2.0), axis=-1)
    heatmap = scale * tf.exp(-squared_distances / (2 * tf.pow(sigmas_reshaped, 2)))
    heatmap_or_zeros = tf.where((is_valid_reshaped + tf.zeros_like(heatmap)) > 0, heatmap, tf.zeros_like(heatmap))

    return heatmap_or_zeros
Beispiel #20
0
def make_density_summary(log_density_fn, num_bins=100):
  """Plot density."""
  if FLAGS.target == dists.NINE_GAUSSIANS_DIST or FLAGS.target == dists.TWO_RINGS_DIST:
    bounds = (-2, 2)
  elif FLAGS.target == dists.CHECKERBOARD_DIST:
    bounds = (0, 1)

  x = tf.range(
      bounds[0], bounds[1], delta=(bounds[1] - bounds[0]) / float(num_bins))
  grid_x, grid_y = tf.meshgrid(x, x, indexing="ij")
  grid_xy = tf.stack([grid_x, grid_y], axis=-1)

  log_z = log_density_fn(grid_xy)
  log_bigz = reduce_logavgexp(log_z)
  z = tf.exp(log_z - log_bigz)

  plot = tf.reshape(z, [num_bins, num_bins])
  return plot
Beispiel #21
0
def tf_voxel_meshgrid(height, width, depth, homogeneous = False):
    with tf.variable_scope('voxel_meshgrid'):
        #Because 'ij' ordering is used for meshgrid, z_t and x_t are swapped (Think about order in 'xy' VS 'ij'
        # ↑↑↑↑↑ I do not understand
        z_t, y_t, x_t = tf.meshgrid(tf.range(depth, dtype = tf.float32),
                                    tf.range(height, dtype = tf.float32),
                                    tf.range(width, dtype = tf.float32), indexing='ij')
        #Reshape into a big list of slices one after another along the X,Y,Z direction
        x_t_flat = tf.reshape(x_t, (1, -1))
        y_t_flat = tf.reshape(y_t, (1, -1))
        z_t_flat = tf.reshape(z_t, (1, -1))

        #Vertical stack to create a (3,N) matrix for X,Y,Z coordinates
        grid = tf.concat([x_t_flat, y_t_flat, z_t_flat], axis=0)
        if homogeneous:
            ones = tf.ones_like(x_t_flat)
            grid = tf.concat([grid, ones], axis = 0)
        return grid
Beispiel #22
0
def yolo(inputs, n_classes, anchors, img_size, data_format):

    n_anchors = len(anchors)

    inputs = tf.layers.conv2d(inputs, filters=n_anchors * (5 + n_classes),
                              kernel_size=1, strides=1, use_bias=True,
                              data_format=data_format)

    shape = inputs.get_shape().as_list()
    grid_shape = shape[2:4] if data_format == 'channels_first' else shape[1:3]
    if data_format == 'channels_first':
        inputs = tf.transpose(inputs, [0, 2, 3, 1])
    inputs = tf.reshape(inputs, [-1, n_anchors * grid_shape[0] * grid_shape[1],
                                 5 + n_classes])

    strides = (img_size[0] // grid_shape[0], img_size[1] // grid_shape[1])

    print("Detection_layer : tf.split : {}".format(inputs))

    box_centers, box_shapes, confidence, classes = \
        tf.split(inputs, [2, 2, 1, n_classes], axis=-1)

    x = tf.range(grid_shape[0], dtype=tf.float32)
    y = tf.range(grid_shape[1], dtype=tf.float32)
    x_offset, y_offset = tf.meshgrid(x, y)
    x_offset = tf.reshape(x_offset, (-1, 1))
    y_offset = tf.reshape(y_offset, (-1, 1))
    x_y_offset = tf.concat([x_offset, y_offset], axis=-1)
    x_y_offset = tf.tile(x_y_offset, [1, n_anchors])
    x_y_offset = tf.reshape(x_y_offset, [1, -1, 2])
    box_centers = tf.nn.sigmoid(box_centers)
    box_centers = (box_centers + x_y_offset) * strides

    anchors = tf.tile(anchors, [grid_shape[0] * grid_shape[1], 1])
    box_shapes = tf.exp(box_shapes) * tf.to_float(anchors)

    confidence = tf.nn.sigmoid(confidence)

    classes = tf.nn.sigmoid(classes)

    inputs = tf.concat([box_centers, box_shapes,
                        confidence, classes], axis=-1)

    return inputs
Beispiel #23
0
def create_centered_identity_transformation_field(shape, spacings):
    """Create 2D or 3D centered identity transformation field.

  Args:
    shape: 2- or 3-element list. The shape of the transformation field.
    spacings: 2- or 3-element list. The spacings of the transformation field.

  Returns:
    2D case: 3-D Tensor (x0, x1, comp) describing a 2D vector field
    3D case: 4-D Tensor (x0, x1, x2, comp)  describing a 3D vector field
  """
    coords = []
    for i, size in enumerate(shape):
        spacing = spacings[i]
        coords.append(
            tf.linspace(-(size - 1) / 2 * spacing, (size - 1) / 2 * spacing,
                        size))
    permutation = np.roll(np.arange(len(coords) + 1), -1)
    return tf.transpose(tf.meshgrid(*coords, indexing="ij"), permutation)
def image_shape_to_grids(height, width):
    """Computes xy-grids given the shape of the image.

  Args:
    height: The height of the image.
    width: The width of the image.

  Returns:
    A tuple of two tensors:
      y_grid: A float tensor with shape [height, width] representing the
        y-coordinate of each pixel grid.
      x_grid: A float tensor with shape [height, width] representing the
        x-coordinate of each pixel grid.
  """
    out_height = tf.cast(height, tf.float32)
    out_width = tf.cast(width, tf.float32)
    x_range = tf.range(out_width, dtype=tf.float32)
    y_range = tf.range(out_height, dtype=tf.float32)
    x_grid, y_grid = tf.meshgrid(x_range, y_range, indexing='xy')
    return (y_grid, x_grid)
def warp(teninput, tenflow):
    """
    warps image with dense flow to obtain motion compensated frame
    """
    batch_size, height, width, channels = (
        tf.shape(teninput)[0],
        tf.shape(teninput)[1],
        tf.shape(teninput)[2],
        tf.shape(teninput)[3],
    )
    grid_x, grid_y = tf.meshgrid(tf.range(width), tf.range(height))
    stacked_grid = tf.cast(tf.stack([grid_y, grid_x], axis=2), tenflow.dtype)
    batched_grid = tf.expand_dims(stacked_grid, axis=0)
    query_points_on_grid = batched_grid - tenflow
    query_points_flattened = tf.reshape(query_points_on_grid,
                                        [batch_size, height * width, 2])
    interpolated = interpolate_bilinear(teninput, query_points_flattened)
    interpolated = tf.reshape(interpolated,
                              [batch_size, height, width, channels])
    return interpolated
Beispiel #26
0
    def _generate_anchors(self, feature_map_shape):
        """Generate anchor for an image.

        Using the feature map, the output of the pretrained network for an
        image, and the anchor_reference generated using the anchor config
        values. We generate a list of anchors.

        Anchors are just fixed bounding boxes of different ratios and sizes
        that are uniformly generated throught the image.

        Args:
            feature_map_shape: Shape of the convolutional feature map used as
                input for the RPN. Should be (batch, height, width, depth).

        Returns:
            all_anchors: A flattened Tensor with all the anchors of shape
                `(num_anchors_per_points * feature_width * feature_height, 4)`
                using the (x1, y1, x2, y2) convention.
        """
        with tf.variable_scope('generate_anchors'):
            grid_width = feature_map_shape[2]  # width
            grid_height = feature_map_shape[1]  # height
            shift_x = tf.range(grid_width) * self._anchor_stride
            shift_y = tf.range(grid_height) * self._anchor_stride
            shift_x, shift_y = tf.meshgrid(shift_x, shift_y)

            shift_x = tf.reshape(shift_x, [-1])
            shift_y = tf.reshape(shift_y, [-1])

            shifts = tf.stack([shift_x, shift_y, shift_x, shift_y], axis=0)

            shifts = tf.transpose(shifts)
            # Shifts now is a (H x W, 4) Tensor

            # Expand dims to use broadcasting sum.
            all_anchors = (np.expand_dims(self._anchor_reference, axis=0) +
                           tf.expand_dims(shifts, axis=1))

            # Flatten
            all_anchors = tf.reshape(all_anchors, (-1, 4))
            return all_anchors
Beispiel #27
0
    def basis(sample_paths):
        """Computes polynomial basis expansion at the given sample points.

    Args:
      sample_paths: A `Tensor`s of either `flot32` or `float64` dtype and of
        shape `[num_samples, dim]` where `dim` has to be statically known.

    Returns:
      A `Tensor`s of shape `[degree * dim, num_samples]`.
    """
        samples = tf.convert_to_tensor(sample_paths)
        dim = samples.shape.as_list()[-1]
        grid = tf.range(0, degree + 1, dtype=samples.dtype)

        samples_centered = samples - tf.math.reduce_mean(samples, axis=0)
        samples_centered = tf.expand_dims(samples_centered, -2)
        grid = tf.meshgrid(*(dim * [grid]))
        grid = tf.reshape(tf.stack(grid, -1), [-1, dim])
        # Shape [num_samples, degree * dim]
        basis_expansion = tf.reduce_prod(samples_centered**grid, -1)
        return tf.transpose(basis_expansion)
Beispiel #28
0
    def _CreateRampTestImages(self, batch_size, height, width):
        """Creates a batch of test images of given size.

    Args:
      batch_size: Number of images to stack into a batch.
      height: Height of the image.
      width: Width of the image.

    Returns:
      images: Tensor of shape [batch_size, height, width, 3]. In each image
        the R-channel values are equal to the x coordinate of the pixel, in G-
        and B-channel values are equal to the y coordinate.
    """
        mesh_x, mesh_y = tf.meshgrid(np.arange(width, dtype=np.float32),
                                     np.arange(height, dtype=np.float32))
        mesh_x = tf.expand_dims(mesh_x, 2)
        mesh_y = tf.expand_dims(mesh_y, 2)
        image = tf.concat([mesh_x, mesh_y, mesh_y], 2)
        image = tf.expand_dims(image, 0)
        images = tf.tile(image, [batch_size, 1, 1, 1])
        return images
def image_to_world_projection(depth, intrinsics, pose_c2w):
    """Project points on the image to the world frame.

  Args:
    depth: [HEIGHT, WIDTH, 1] the depth map contains the radial distance from
      the camera eye to each point corresponding to each pixel.
    intrinsics: [3, 3] camera's intrinsic matrix.
    pose_c2w: [3, 4] camera pose matrix (camera to world).

  Returns:
    [HEIGHT, WIDTH, 3] points in the world's coordinate frame.
  """
    shape = depth.shape.as_list()
    height, width = shape[0], shape[1]
    xx, yy = tf.meshgrid(tf.lin_space(0., width - 1., width),
                         tf.lin_space(0., height - 1., height))
    p_pixel_homogeneous = tf.concat(
        [tf.stack([xx, yy], axis=-1),
         tf.ones([height, width, 1])], -1)

    p_image = tf.squeeze(
        tf.matmul(tf.matrix_inverse(intrinsics[tf.newaxis, tf.newaxis, :]),
                  tf.expand_dims(p_pixel_homogeneous, -1)), -1)

    z = depth * tf.reduce_sum(
        tf.math.l2_normalize(p_image, axis=-1) * tf.constant([[[0., 0., 1.]]]),
        axis=-1,
        keepdims=True)
    p_camera = z * p_image
    # convert to OpenGL coordinate system.
    p_camera = p_camera * tf.constant([1., 1., -1.], shape=[1, 1, 3])
    p_camera_homogeneous = tf.concat(
        [p_camera, tf.ones(shape=[height, width, 1])], -1)
    # Convert camera coordinates to world coordinates.
    p_world = tf.squeeze(
        tf.matmul(pose_c2w[tf.newaxis, tf.newaxis, :],
                  tf.expand_dims(p_camera_homogeneous, -1)), -1)
    return p_world
def camera_to_world_projection(depth, intrinsics, camera_to_world):
    """Project camera coordinates to world coordinates."""
    # p_pixel: batch, w, h, 3 principal_point, fov 2-d list
    # r: batch, 3, 3 camera to world rotation
    # t: batch, 3 camera to world translation, depth: batch, w, h, 1
    shape = depth.shape.as_list()
    height, width = shape[0], shape[1]
    xx, yy = tf.meshgrid(tf.lin_space(0., width - 1., width),
                         tf.lin_space(0., height - 1., height))
    p_pixel = tf.stack([xx, yy], axis=-1)
    p_pixel_homogeneous = tf.concat([p_pixel, tf.ones([height, width, 1])], -1)

    camera_to_world = tf.tile(camera_to_world[tf.newaxis, tf.newaxis, :],
                              [height, width, 1, 1])
    intrinsics = tf.tile(intrinsics[tf.newaxis, tf.newaxis, :],
                         [height, width, 1, 1])
    # Convert pixels coordinates (u, v, 1) to camera coordinates (x_c, y_c, f)
    # on the image plane.
    p_image = tf.squeeze(
        tf.matmul(tf.matrix_inverse(intrinsics),
                  tf.expand_dims(p_pixel_homogeneous, -1)), -1)

    lookat_axis = tf.tile(tf.constant([0., 0., 1.], shape=[1, 1, 3]),
                          [height, width, 1])
    z = depth * tf.reduce_sum(
        tf.math.l2_normalize(p_image, axis=-1) * lookat_axis,
        axis=-1,
        keepdims=True)
    p_camera = z * p_image
    # convert from OpenCV convention to OpenGL
    p_camera = p_camera * tf.constant([1., 1., -1.], shape=[1, 1, 3])
    p_camera_homogeneous = tf.concat(
        [p_camera, tf.ones(shape=[height, width, 1])], -1)
    # Convert camera coordinates to world coordinates.
    p_world = tf.squeeze(
        tf.matmul(camera_to_world, tf.expand_dims(p_camera_homogeneous, -1)),
        -1)
    return p_world