Example #1
0
def random_square_crop(image_size, min_scale):
  """Generates a random square crop within an image.

  Args:
    image_size: a [height, width] tensor.
    min_scale: how much the minimum dimension can be scaled down when taking a
      crop. (e.g. if the image is 480 x 640, a min_scale of 0.8 means the output
      crop can have a height and width between 480 and 384, which is 480 * 0.8.)

  Returns:
    output_begin, output_size and image_size.
    output_begin and output_size are three element tensors specifying the shape
    to crop using crop_sequence below. image_size is a two element
    [height, width] tensor from the input.
  """
  min_dim = tf.reduce_min(image_size[0:2])
  sampled_size = tf.to_int32(
      tf.to_float(min_dim) * tf.random_uniform([], min_scale, 1.0))
  output_size = tf.stack([sampled_size, sampled_size, -1])
  height_offset = tf.random_uniform([],
                                    0,
                                    image_size[0] - sampled_size + 1,
                                    dtype=tf.int32)
  width_offset = tf.random_uniform([],
                                   0,
                                   image_size[1] - sampled_size + 1,
                                   dtype=tf.int32)
  output_begin = tf.stack([height_offset, width_offset, 0])
  return output_begin, output_size, image_size
def sample_boxes_by_jittering(boxlist,
                              num_boxes_to_sample,
                              stddev=0.1,
                              scope=None):
    """Samples num_boxes_to_sample boxes by jittering around boxlist boxes.

  It is possible that this function might generate boxes with size 0. The larger
  the stddev, this is more probable. For a small stddev of 0.1 this probability
  is very small.

  Args:
    boxlist: A boxlist containing N boxes in normalized coordinates.
    num_boxes_to_sample: A positive integer containing the number of boxes to
      sample.
    stddev: Standard deviation. This is used to draw random offsets for the
      box corners from a normal distribution. The offset is multiplied by the
      box size so will be larger in terms of pixels for larger boxes.
    scope: Name scope.

  Returns:
    sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in
      normalized coordinates.
  """
    with tf.name_scope(scope, 'SampleBoxesByJittering'):
        num_boxes = boxlist.num_boxes()
        box_indices = tf.random_uniform([num_boxes_to_sample],
                                        minval=0,
                                        maxval=num_boxes,
                                        dtype=tf.int32)
        sampled_boxes = tf.gather(boxlist.get(), box_indices)
        sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0]
        sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1]
        rand_miny_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_minx_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_maxy_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        rand_maxx_gaussian = tf.random_normal([num_boxes_to_sample],
                                              stddev=stddev)
        miny = rand_miny_gaussian * sampled_boxes_height + sampled_boxes[:, 0]
        minx = rand_minx_gaussian * sampled_boxes_width + sampled_boxes[:, 1]
        maxy = rand_maxy_gaussian * sampled_boxes_height + sampled_boxes[:, 2]
        maxx = rand_maxx_gaussian * sampled_boxes_width + sampled_boxes[:, 3]
        maxy = tf.maximum(miny, maxy)
        maxx = tf.maximum(minx, maxx)
        sampled_boxes = tf.stack([miny, minx, maxy, maxx], axis=1)
        sampled_boxes = tf.maximum(tf.minimum(sampled_boxes, 1.0), 0.0)
        return box_list.BoxList(sampled_boxes)
Example #3
0
def _align_single_cycle(cycle, embs, cycle_length, num_steps, similarity_type,
                        temperature):
    """Takes a single cycle and returns logits (simialrity scores) and labels."""
    # Choose random frame.
    n_idx = tf.random_uniform((), minval=0, maxval=num_steps, dtype=tf.int32)
    # Create labels
    onehot_labels = tf.one_hot(n_idx, num_steps)

    # Choose query feats for first frame.
    query_feats = embs[cycle[0], n_idx:n_idx + 1]

    num_channels = tf.shape(query_feats)[-1]
    for c in range(1, cycle_length + 1):
        candidate_feats = embs[cycle[c]]

        if similarity_type == 'l2':
            # Find L2 distance.
            mean_squared_distance = tf.reduce_sum(tf.squared_difference(
                tf.tile(query_feats, [num_steps, 1]), candidate_feats),
                                                  axis=1)
            # Convert L2 distance to similarity.
            similarity = -mean_squared_distance

        elif similarity_type == 'cosine':
            # Dot product of embeddings.
            similarity = tf.squeeze(
                tf.matmul(candidate_feats, query_feats, transpose_b=True))
        else:
            raise ValueError('similarity_type can either be l2 or cosine.')

        # Scale the distance  by number of channels. This normalization helps with
        # optimization.
        similarity /= tf.cast(num_channels, tf.float32)
        # Scale the distance by a temperature that helps with how soft/hard the
        # alignment should be.
        similarity /= temperature

        beta = tf.nn.softmax(similarity)
        beta = tf.expand_dims(beta, axis=1)
        beta = tf.tile(beta, [1, num_channels])

        # Find weighted nearest neighbour.
        query_feats = tf.reduce_sum(beta * candidate_feats,
                                    axis=0,
                                    keepdims=True)

    return similarity, onehot_labels
Example #4
0
    def call(self, net, training):
        keep_prob = self.keep_prob
        dropblock_size = self.dropblock_size
        data_format = self.data_format
        if not training or keep_prob is None:
            return net

        tf.logging.info(
            'Applying DropBlock: dropblock_size {}, net.shape {}'.format(
                dropblock_size, net.shape))

        if data_format == 'channels_last':
            _, width, height, _ = net.get_shape().as_list()
        else:
            _, _, width, height = net.get_shape().as_list()
        if width != height:
            raise ValueError(
                'Input tensor with width!=height is not supported.')

        dropblock_size = min(dropblock_size, width)
        # seed_drop_rate is the gamma parameter of DropBlcok.
        seed_drop_rate = (1.0 - keep_prob) * width**2 / dropblock_size**2 / (
            width - dropblock_size + 1)**2

        # Forces the block to be inside the feature map.
        w_i, h_i = tf.meshgrid(tf.range(width), tf.range(width))
        valid_block_center = tf.logical_and(
            tf.logical_and(w_i >= int(dropblock_size // 2),
                           w_i < width - (dropblock_size - 1) // 2),
            tf.logical_and(h_i >= int(dropblock_size // 2),
                           h_i < width - (dropblock_size - 1) // 2))

        valid_block_center = tf.expand_dims(valid_block_center, 0)
        valid_block_center = tf.expand_dims(
            valid_block_center, -1 if data_format == 'channels_last' else 0)

        randnoise = tf.random_uniform(net.shape, dtype=tf.float32)
        block_pattern = (
            1 - tf.cast(valid_block_center, dtype=tf.float32) + tf.cast(
                (1 - seed_drop_rate), dtype=tf.float32) + randnoise) >= 1
        block_pattern = tf.cast(block_pattern, dtype=tf.float32)

        if dropblock_size == width:
            block_pattern = tf.reduce_min(
                block_pattern,
                axis=[1, 2] if data_format == 'channels_last' else [2, 3],
                keepdims=True)
        else:
            if data_format == 'channels_last':
                ksize = [1, dropblock_size, dropblock_size, 1]
            else:
                ksize = [1, 1, dropblock_size, dropblock_size]
            block_pattern = -tf.nn.max_pool(-block_pattern,
                                            ksize=ksize,
                                            strides=[1, 1, 1, 1],
                                            padding='SAME',
                                            data_format='NHWC' if data_format
                                            == 'channels_last' else 'NCHW')

        percent_ones = (tf.cast(tf.reduce_sum((block_pattern)), tf.float32) /
                        tf.cast(tf.size(block_pattern), tf.float32))

        net = net / tf.cast(percent_ones, net.dtype) * tf.cast(
            block_pattern, net.dtype)
        return net