コード例 #1
0
ファイル: utils.py プロジェクト: zyc00/deepmind-research
def get_position_signal(sequence_length, position_dim=8):
    """Return fixed position signal as sine waves.

  Sine waves frequencies are linearly spaced so that shortest is 2 and
  longest is half the maximum length. That way the longest frequency
  is long enough to be monotonous over the whole sequence length.
  Sine waves are also shifted so that they don't all start with the same
  value.
  We don't use learned positional embeddings because these embeddings are
  projected linearly along with the original embeddings, and the projection is
  learned.

  Args:
    sequence_length: int, T, length of the sequence..
    position_dim: int, P, number of sine waves.

  Returns:
    A [T, P] tensor, position embeddings.
  """
    # Compute the frequencies.
    periods = tf.exp(
        tf.lin_space(tf.log(2.0), tf.log(tf.to_float(sequence_length)),
                     position_dim))
    frequencies = 1.0 / periods  # Shape [T, P].

    # Compute the sine waves.
    xs = frequencies[None, :] * tf.to_float(tf.range(sequence_length)[:, None])
    shifts = tf.lin_space(0.0, 2.0, position_dim)[None, :]  # [1, P]
    positions = tf.math.cos(math.pi * (xs + shifts))  # [T, P]
    positions.shape.assert_is_compatible_with([sequence_length, position_dim])
    return positions
コード例 #2
0
ファイル: ops.py プロジェクト: tallamjr/google-research
def add_coord_channels(image_tensor):
    """Adds channels containing pixel indices (x and y coordinates) to an image.

  Note: This has nothing to do with keypoint coordinates. It is just a data
  augmentation to allow convolutional networks to learn non-translation-
  equivariant outputs. This is similar to the "CoordConv" layers:
  https://arxiv.org/abs/1603.09382.

  Args:
    image_tensor: [batch_size, H, W, C] tensor.

  Returns:
    [batch_size, H, W, C + 2] tensor with x and y coordinate channels.
  """

    batch_size = tf.shape(image_tensor)[0]
    x_size = tf.shape(image_tensor)[2]
    y_size = tf.shape(image_tensor)[1]

    x_grid = tf.lin_space(-1.0, 1.0, x_size)
    x_map = tf.tile(x_grid[tf.newaxis, tf.newaxis, :, tf.newaxis],
                    (batch_size, y_size, 1, 1))

    y_grid = tf.lin_space(1.0, -1.0, y_size)
    y_map = tf.tile(y_grid[tf.newaxis, :, tf.newaxis, tf.newaxis],
                    (batch_size, 1, x_size, 1))

    return tf.concat([image_tensor, x_map, y_map], axis=-1)
コード例 #3
0
    def contrast_normalize(self, I):
        dist = tf.distributions.Normal(loc=0., scale=self.sigma)

        W = (self.kernel_size - 1) / 2.0
        box_x = tf.lin_space(-W, W, self.kernel_size)
        box_y = tf.lin_space(-W, W, self.kernel_size)

        prob_x = dist.prob(box_x)
        prob_y = dist.prob(box_y)

        gaussian_box = tf.matmul(tf.reshape(prob_x, [self.kernel_size, 1]),
                                 tf.reshape(prob_y, [1, self.kernel_size]))
        gaussian_box = tf.reshape(gaussian_box,
                                  [self.kernel_size, self.kernel_size, 1, 1])
        gaussian_box = tf.divide(gaussian_box, tf.reduce_sum(gaussian_box))

        avg_I = tf.nn.conv2d(I,
                             gaussian_box,
                             strides=[1, 1, 1, 1],
                             padding='SAME')
        normalized_I = I - avg_I

        img_H = tf.shape(I)[1]
        img_W = tf.shape(I)[2]

        normalized_I = tf.slice(
            normalized_I, [0, self.border, self.border, 0],
            [-1, img_H - 2 * self.border, img_W - 2 * self.border, -1])

        return [normalized_I]
コード例 #4
0
def get_random_scale(min_scale_factor, max_scale_factor, step_size):
    """Gets a random scale value.

  Args:
    min_scale_factor: Minimum scale value.
    max_scale_factor: Maximum scale value.
    step_size: The step size from minimum to maximum value.

  Returns:
    A random scale value selected between minimum and maximum value.
  Raises:
    ValueError: min_scale_factor has unexpected value.
  """
    if min_scale_factor < 0 or min_scale_factor > max_scale_factor:
        raise ValueError('Unexpected value of min_scale_factor.')

    if min_scale_factor == max_scale_factor:
        return tf.to_float(min_scale_factor)

    # When step_size = 0, we sample the value uniformly from [min, max).
    if step_size == 0:
        return tf.random_uniform([1],
                                 minval=min_scale_factor,
                                 maxval=max_scale_factor)

    # When step_size != 0, we randomly select one discrete value from [min, max].
    num_steps = int((max_scale_factor - min_scale_factor) / step_size + 1)
    scale_factors = tf.lin_space(min_scale_factor, max_scale_factor, num_steps)
    shuffled_scale_factors = tf.random_shuffle(scale_factors)
    return shuffled_scale_factors[0]
コード例 #5
0
def camera_to_world_projection(depth, intrinsics, camera_to_world):
    """Project camera coordinates to world coordinates."""
    # p_pixel: batch, w, h, 3 principal_point, fov 2-d list
    # r: batch, 3, 3 camera to world rotation
    # t: batch, 3 camera to world translation, depth: batch, w, h, 1
    shape = depth.shape.as_list()
    height, width = shape[0], shape[1]
    xx, yy = tf.meshgrid(tf.lin_space(0., width - 1., width),
                         tf.lin_space(0., height - 1., height))
    p_pixel = tf.stack([xx, yy], axis=-1)
    p_pixel_homogeneous = tf.concat([p_pixel, tf.ones([height, width, 1])], -1)

    camera_to_world = tf.tile(camera_to_world[tf.newaxis, tf.newaxis, :],
                              [height, width, 1, 1])
    intrinsics = tf.tile(intrinsics[tf.newaxis, tf.newaxis, :],
                         [height, width, 1, 1])
    # Convert pixels coordinates (u, v, 1) to camera coordinates (x_c, y_c, f)
    # on the image plane.
    p_image = tf.squeeze(
        tf.matmul(tf.matrix_inverse(intrinsics),
                  tf.expand_dims(p_pixel_homogeneous, -1)), -1)

    lookat_axis = tf.tile(tf.constant([0., 0., 1.], shape=[1, 1, 3]),
                          [height, width, 1])
    z = depth * tf.reduce_sum(
        tf.math.l2_normalize(p_image, axis=-1) * lookat_axis,
        axis=-1,
        keepdims=True)
    p_camera = z * p_image
    # convert from OpenCV convention to OpenGL
    p_camera = p_camera * tf.constant([1., 1., -1.], shape=[1, 1, 3])
    p_camera_homogeneous = tf.concat(
        [p_camera, tf.ones(shape=[height, width, 1])], -1)
    # Convert camera coordinates to world coordinates.
    p_world = tf.squeeze(
        tf.matmul(camera_to_world, tf.expand_dims(p_camera_homogeneous, -1)),
        -1)
    return p_world
コード例 #6
0
def image_to_world_projection(depth, intrinsics, pose_c2w):
    """Project points on the image to the world frame.

  Args:
    depth: [HEIGHT, WIDTH, 1] the depth map contains the radial distance from
      the camera eye to each point corresponding to each pixel.
    intrinsics: [3, 3] camera's intrinsic matrix.
    pose_c2w: [3, 4] camera pose matrix (camera to world).

  Returns:
    [HEIGHT, WIDTH, 3] points in the world's coordinate frame.
  """
    shape = depth.shape.as_list()
    height, width = shape[0], shape[1]
    xx, yy = tf.meshgrid(tf.lin_space(0., width - 1., width),
                         tf.lin_space(0., height - 1., height))
    p_pixel_homogeneous = tf.concat(
        [tf.stack([xx, yy], axis=-1),
         tf.ones([height, width, 1])], -1)

    p_image = tf.squeeze(
        tf.matmul(tf.matrix_inverse(intrinsics[tf.newaxis, tf.newaxis, :]),
                  tf.expand_dims(p_pixel_homogeneous, -1)), -1)

    z = depth * tf.reduce_sum(
        tf.math.l2_normalize(p_image, axis=-1) * tf.constant([[[0., 0., 1.]]]),
        axis=-1,
        keepdims=True)
    p_camera = z * p_image
    # convert to OpenGL coordinate system.
    p_camera = p_camera * tf.constant([1., 1., -1.], shape=[1, 1, 3])
    p_camera_homogeneous = tf.concat(
        [p_camera, tf.ones(shape=[height, width, 1])], -1)
    # Convert camera coordinates to world coordinates.
    p_world = tf.squeeze(
        tf.matmul(pose_c2w[tf.newaxis, tf.newaxis, :],
                  tf.expand_dims(p_camera_homogeneous, -1)), -1)
    return p_world
コード例 #7
0
def generate_equirectangular_grid(shape):
    """Get spherical coordinates of an equirectangular grid.

  Args:
    shape: a list represents the (height, width) of the output.

  Returns:
    3-D tensor of shape `[HEIGHT, WIDTH, 2]`

  Raises:
    ValueError: 'resolution' is not valid.
  """
    with tf.name_scope(None, 'generate_equirectangular_grid', [shape]):
        if not isinstance(shape, list) or len(shape) != 2:
            raise ValueError("'shape' is not valid.")

        height, width = shape[0], shape[1]
        pixel_w = 2 * math.pi / float(width)
        pixel_h = math.pi / float(height)
        azimuth, colatitude = tf.meshgrid(
            tf.lin_space(pixel_w / 2, 2 * math.pi - pixel_w / 2, width),
            tf.lin_space(pixel_h / 2, math.pi - pixel_h / 2, height))
        return tf.stack([colatitude, azimuth], axis=-1)
コード例 #8
0
ファイル: summary.py プロジェクト: stante/tensorboard
 def when_nonsingular():
     bucket_width = range_ / tf.cast(bucket_count, tf.float64)
     offsets = data - min_
     bucket_indices = tf.cast(tf.floor(offsets / bucket_width),
                              dtype=tf.int32)
     clamped_indices = tf.minimum(bucket_indices, bucket_count - 1)
     one_hots = tf.one_hot(clamped_indices, depth=bucket_count)
     bucket_counts = tf.cast(tf.reduce_sum(one_hots, axis=0),
                             dtype=tf.float64)
     edges = tf.lin_space(min_, max_, bucket_count + 1)
     left_edges = edges[:-1]
     right_edges = edges[1:]
     return tf.transpose(
         tf.stack([left_edges, right_edges, bucket_counts]))
コード例 #9
0
def generate_cartesian_grid(resolution, fov):
    """Get (x, y, z) coordinates of all pixel centres in the image.

  The image plane lies at z=-1 and the image center is (0, 0, -1).
  Args:
    resolution: a 2-D list containing the resolution (height, width)
                of the desired output.
    fov: (float) camera's horizontal field of view in degrees.

  Returns:
    3-D tensor of shape `[HEIGHT, WIDTH, 3]`

  Raises:
    ValueError: 'resolution' is not valid.
  """
    with tf.name_scope(None, 'generate_cartesian_grid', [resolution, fov]):
        if not isinstance(resolution, list) or len(resolution) != 2:
            raise ValueError("'resolution' is not valid.")

        fov = fov / 180 * math.pi
        width = 2 * tf.tan(fov / 2)
        height = width * resolution[0] / resolution[1]
        pixel_size = width / resolution[1]
        x_range = width - pixel_size
        y_range = height - pixel_size
        # x increases from left to right while y increases from bottom to top.
        # Use half-integer pixel centre convention, and generate the coordinates
        # for the centres of the pixels.
        # For example, a 2x3 grid with pixel_size=1 (height=2, width=3) should have
        # [(-1.0,  0.5), (0.0,  0.5), (1.0,  0.5),
        #  (-1.0, -0.5), (0.0, -0.5), (1.0, -0.5)]
        xx, yy = tf.meshgrid(
            tf.lin_space(-x_range / 2, x_range / 2, resolution[1]),
            tf.lin_space(y_range / 2, -y_range / 2, resolution[0]))
        grid = tf.stack([xx, yy, -tf.ones_like(xx)], axis=-1)
        return grid
コード例 #10
0
def generateCoords(inputShape):
    crop_size = inputShape[-2]
    firstDim = inputShape[0]

    Xcoords = tf.expand_dims(tf.lin_space(-1.0, 1.0, crop_size), axis=0)
    Xcoords = tf.tile(Xcoords, [crop_size, 1])
    Ycoords = -1 * tf.transpose(Xcoords)  #put -1 in the bottom of the table
    Xcoords = tf.expand_dims(Xcoords, axis=-1)
    Ycoords = tf.expand_dims(Ycoords, axis=-1)
    coords = tf.concat([Xcoords, Ycoords], axis=-1)
    coords = tf.expand_dims(
        coords, axis=0
    )  #Add dimension to support batch size and nbRenderings should now be [1, 256, 256, 2].
    coords = tf.tile(
        coords, [firstDim, 1, 1, 1])  #Add the proper dimension here for concat
    return coords
コード例 #11
0
def equirectangular_area_weights(height):
    """Generate area weights for pixels in equirectangular images.

  This is to account for the area difference of pixels at different latitudes on
  equirectangular grids.

  Args:
    height: the height dimension of the equirectangular images.

  Returns:
    Area weighted with shape [1, HEIGHT, 1, 1].
  """
    with tf.name_scope(None, 'equirectangular_area_weights', [height]):
        pixel_h = math.pi / tf.cast(height, tf.float32)
        # Use half-integer pixel centre convention, and generate the spherical
        # coordinates for the centres of the pixels.
        colatitude = tf.lin_space(pixel_h / 2, math.pi - pixel_h / 2, height)
        colatitude = colatitude[tf.newaxis, :, tf.newaxis, tf.newaxis]
        return tf.sin(colatitude)
コード例 #12
0
def generateSurfaceArray(crop_size, pixelsToAdd=0):
    totalSize = crop_size + (pixelsToAdd * 2)
    surfaceArray = []
    XsurfaceArray = tf.expand_dims(tf.lin_space(-1.0, 1.0, totalSize), axis=0)
    XsurfaceArray = tf.tile(XsurfaceArray, [totalSize, 1])
    YsurfaceArray = -1 * tf.transpose(
        XsurfaceArray)  #put -1 in the bottom of the table
    XsurfaceArray = tf.expand_dims(XsurfaceArray, axis=-1)
    YsurfaceArray = tf.expand_dims(YsurfaceArray, axis=-1)

    surfaceArray = tf.concat([
        XsurfaceArray, YsurfaceArray,
        tf.zeros([totalSize, totalSize, 1], dtype=tf.float32)
    ],
                             axis=-1)
    surfaceArray = tf.expand_dims(
        tf.expand_dims(surfaceArray, axis=0),
        axis=0)  #Add dimension to support batch size and nbRenderings
    return surfaceArray
コード例 #13
0
    def extract_glimpses(self, images, locations):
        """Extracts fovea-like glimpses.

    Args:
      images: 4-D Tensor of shape [batch, height, width, channels].
      locations: 2D Tensor of shape [batch, 2] with glimpse locations. Locations
        are in the interval of [-1, 1] where points:
        (-1, -1): upper left corner.
        (-1, 1): upper right corner.
        (1, 1): lower right corner.
        (1, -1): lower left corner.

    Returns:
      glimpses: 5D tensor of size [batch, # glimpses, height, width, channels].
    """
        # Get multi resolution fields of view (first is full resolution)
        image_shape = tf.cast(tf.shape(images)[1:3], dtype=tf.float32)
        start = tf.cast(self.glimpse_shape[0],
                        dtype=tf.float32) / image_shape[0]
        fields_of_view = tf.cast(tf.lin_space(start, 1., self.num_resolutions),
                                 dtype=tf.float32)
        receptive_fields = [self.glimpse_shape] + [
            tf.cast(fields_of_view[i] * image_shape, dtype=tf.int32)
            for i in range(1, self.num_resolutions)
        ]
        images_glimpses_list = []
        for field in receptive_fields:
            # Extract a glimpse with specific shape and scale.
            images_glimpse = utils.extract_glimpse(images,
                                                   size=field,
                                                   offsets=locations)
            # Bigger receptive fields have lower resolution.
            images_glimpse = tf.image.resize_images(images_glimpse,
                                                    size=self.glimpse_shape)
            # Stop gradient
            if self.apply_stop_gradient:
                images_glimpse = tf.stop_gradient(images_glimpse)
            images_glimpses_list.append(images_glimpse)
        return images_glimpses_list
コード例 #14
0
    [(0, 0, 0), (1, 0, 0), (1, 1, 0), (2, 1, 0)]
]  # L

dataset = [np.array(points_) for points_ in tetris]
num_classes = len(dataset)

tf.disable_eager_execution()

# In[20]:

# radial basis functions
rbf_low = 0.0
rbf_high = 3.5
rbf_count = 4
rbf_spacing = (rbf_high - rbf_low) / rbf_count
centers = tf.cast(tf.lin_space(rbf_low, rbf_high, rbf_count), FLOAT_TYPE)

# In[23]:

# r : [N, 3]
r = tf.placeholder(FLOAT_TYPE, shape=(4, 3))

# rij : [N, N, 3]
rij = utils.difference_matrix(r)

# dij : [N, N]
dij = utils.distance_matrix(r)

# rbf : [N, N, rbf_count]
gamma = 1. / rbf_spacing
rbf = tf.exp(-gamma * tf.square(tf.expand_dims(dij, axis=-1) - centers))