def random_square_crop(image_size, min_scale): """Generates a random square crop within an image. Args: image_size: a [height, width] tensor. min_scale: how much the minimum dimension can be scaled down when taking a crop. (e.g. if the image is 480 x 640, a min_scale of 0.8 means the output crop can have a height and width between 480 and 384, which is 480 * 0.8.) Returns: output_begin, output_size and image_size. output_begin and output_size are three element tensors specifying the shape to crop using crop_sequence below. image_size is a two element [height, width] tensor from the input. """ min_dim = tf.reduce_min(image_size[0:2]) sampled_size = tf.to_int32( tf.to_float(min_dim) * tf.random_uniform([], min_scale, 1.0)) output_size = tf.stack([sampled_size, sampled_size, -1]) height_offset = tf.random_uniform([], 0, image_size[0] - sampled_size + 1, dtype=tf.int32) width_offset = tf.random_uniform([], 0, image_size[1] - sampled_size + 1, dtype=tf.int32) output_begin = tf.stack([height_offset, width_offset, 0]) return output_begin, output_size, image_size
def sample_boxes_by_jittering(boxlist, num_boxes_to_sample, stddev=0.1, scope=None): """Samples num_boxes_to_sample boxes by jittering around boxlist boxes. It is possible that this function might generate boxes with size 0. The larger the stddev, this is more probable. For a small stddev of 0.1 this probability is very small. Args: boxlist: A boxlist containing N boxes in normalized coordinates. num_boxes_to_sample: A positive integer containing the number of boxes to sample. stddev: Standard deviation. This is used to draw random offsets for the box corners from a normal distribution. The offset is multiplied by the box size so will be larger in terms of pixels for larger boxes. scope: Name scope. Returns: sampled_boxlist: A boxlist containing num_boxes_to_sample boxes in normalized coordinates. """ with tf.name_scope(scope, 'SampleBoxesByJittering'): num_boxes = boxlist.num_boxes() box_indices = tf.random_uniform([num_boxes_to_sample], minval=0, maxval=num_boxes, dtype=tf.int32) sampled_boxes = tf.gather(boxlist.get(), box_indices) sampled_boxes_height = sampled_boxes[:, 2] - sampled_boxes[:, 0] sampled_boxes_width = sampled_boxes[:, 3] - sampled_boxes[:, 1] rand_miny_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) rand_minx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) rand_maxy_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) rand_maxx_gaussian = tf.random_normal([num_boxes_to_sample], stddev=stddev) miny = rand_miny_gaussian * sampled_boxes_height + sampled_boxes[:, 0] minx = rand_minx_gaussian * sampled_boxes_width + sampled_boxes[:, 1] maxy = rand_maxy_gaussian * sampled_boxes_height + sampled_boxes[:, 2] maxx = rand_maxx_gaussian * sampled_boxes_width + sampled_boxes[:, 3] maxy = tf.maximum(miny, maxy) maxx = tf.maximum(minx, maxx) sampled_boxes = tf.stack([miny, minx, maxy, maxx], axis=1) sampled_boxes = tf.maximum(tf.minimum(sampled_boxes, 1.0), 0.0) return box_list.BoxList(sampled_boxes)
def _align_single_cycle(cycle, embs, cycle_length, num_steps, similarity_type, temperature): """Takes a single cycle and returns logits (simialrity scores) and labels.""" # Choose random frame. n_idx = tf.random_uniform((), minval=0, maxval=num_steps, dtype=tf.int32) # Create labels onehot_labels = tf.one_hot(n_idx, num_steps) # Choose query feats for first frame. query_feats = embs[cycle[0], n_idx:n_idx + 1] num_channels = tf.shape(query_feats)[-1] for c in range(1, cycle_length + 1): candidate_feats = embs[cycle[c]] if similarity_type == 'l2': # Find L2 distance. mean_squared_distance = tf.reduce_sum(tf.squared_difference( tf.tile(query_feats, [num_steps, 1]), candidate_feats), axis=1) # Convert L2 distance to similarity. similarity = -mean_squared_distance elif similarity_type == 'cosine': # Dot product of embeddings. similarity = tf.squeeze( tf.matmul(candidate_feats, query_feats, transpose_b=True)) else: raise ValueError('similarity_type can either be l2 or cosine.') # Scale the distance by number of channels. This normalization helps with # optimization. similarity /= tf.cast(num_channels, tf.float32) # Scale the distance by a temperature that helps with how soft/hard the # alignment should be. similarity /= temperature beta = tf.nn.softmax(similarity) beta = tf.expand_dims(beta, axis=1) beta = tf.tile(beta, [1, num_channels]) # Find weighted nearest neighbour. query_feats = tf.reduce_sum(beta * candidate_feats, axis=0, keepdims=True) return similarity, onehot_labels
def call(self, net, training): keep_prob = self.keep_prob dropblock_size = self.dropblock_size data_format = self.data_format if not training or keep_prob is None: return net tf.logging.info( 'Applying DropBlock: dropblock_size {}, net.shape {}'.format( dropblock_size, net.shape)) if data_format == 'channels_last': _, width, height, _ = net.get_shape().as_list() else: _, _, width, height = net.get_shape().as_list() if width != height: raise ValueError( 'Input tensor with width!=height is not supported.') dropblock_size = min(dropblock_size, width) # seed_drop_rate is the gamma parameter of DropBlcok. seed_drop_rate = (1.0 - keep_prob) * width**2 / dropblock_size**2 / ( width - dropblock_size + 1)**2 # Forces the block to be inside the feature map. w_i, h_i = tf.meshgrid(tf.range(width), tf.range(width)) valid_block_center = tf.logical_and( tf.logical_and(w_i >= int(dropblock_size // 2), w_i < width - (dropblock_size - 1) // 2), tf.logical_and(h_i >= int(dropblock_size // 2), h_i < width - (dropblock_size - 1) // 2)) valid_block_center = tf.expand_dims(valid_block_center, 0) valid_block_center = tf.expand_dims( valid_block_center, -1 if data_format == 'channels_last' else 0) randnoise = tf.random_uniform(net.shape, dtype=tf.float32) block_pattern = ( 1 - tf.cast(valid_block_center, dtype=tf.float32) + tf.cast( (1 - seed_drop_rate), dtype=tf.float32) + randnoise) >= 1 block_pattern = tf.cast(block_pattern, dtype=tf.float32) if dropblock_size == width: block_pattern = tf.reduce_min( block_pattern, axis=[1, 2] if data_format == 'channels_last' else [2, 3], keepdims=True) else: if data_format == 'channels_last': ksize = [1, dropblock_size, dropblock_size, 1] else: ksize = [1, 1, dropblock_size, dropblock_size] block_pattern = -tf.nn.max_pool(-block_pattern, ksize=ksize, strides=[1, 1, 1, 1], padding='SAME', data_format='NHWC' if data_format == 'channels_last' else 'NCHW') percent_ones = (tf.cast(tf.reduce_sum((block_pattern)), tf.float32) / tf.cast(tf.size(block_pattern), tf.float32)) net = net / tf.cast(percent_ones, net.dtype) * tf.cast( block_pattern, net.dtype) return net