def testBlockSizeNotDivisibleWidth(self): # The block size divides width but not height. x_np = [[[[1], [2], [3]], [[3], [4], [7]]]] block_size = 3 with self.assertRaises(IndexError): _ = tf.space_to_depth(x_np, block_size)
def testInputWrongDimMissingBatch(self): # The input is missing the first dimension ("batch") x_np = [[[1], [2]], [[3], [4]]] block_size = 2 with self.assertRaises(ValueError): _ = tf.space_to_depth(x_np, block_size)
def testBasic(self): x_np = [[[[1], [2]], [[3], [4]]]] with self.test_session(use_gpu=False): block_size = 2 out_tf = tf.space_to_depth(x_np, block_size) self.assertAllEqual(out_tf.eval(), [[[[1, 2, 3, 4]]]])
def testBlockSizeNotDivisibleBoth(self): # The block size does not divide neither width or height. x_np = [[[[1], [2]], [[3], [4]]]] block_size = 3 with self.assertRaises(IndexError): _ = tf.space_to_depth(x_np, block_size)
def testDepthInterleaved(self): x_np = [[[[1, 10], [2, 20]], [[3, 30], [4, 40]]]] with self.test_session(use_gpu=False): block_size = 2 out_tf = tf.space_to_depth(x_np, block_size) self.assertAllEqual(out_tf.eval(), [[[[1, 10, 2, 20, 3, 30, 4, 40]]]])
def testInputWrongDimMissingDepth(self): # The input is missing the last dimension ("depth") x_np = [[[1, 2], [3, 4]]] block_size = 2 with self.assertRaises(ValueError): out_tf = tf.space_to_depth(x_np, block_size) out_tf.eval()
def testBlockSize0(self): # The block size is 0. x_np = [[[[1], [2]], [[3], [4]]]] block_size = 0 with self.assertRaises(ValueError): out_tf = tf.space_to_depth(x_np, block_size) out_tf.eval()
def testBlockSizeOne(self): # The block size is 1. The block size needs to be > 1. x_np = [[[[1], [2]], [[3], [4]]]] block_size = 1 with self.assertRaises(ValueError): out_tf = tf.space_to_depth(x_np, block_size) out_tf.eval()
def testBlockSizeLarger(self): # The block size is too large for this input. x_np = [[[[1], [2]], [[3], [4]]]] block_size = 10 with self.assertRaises(IndexError): out_tf = tf.space_to_depth(x_np, block_size) out_tf.eval()
def testBlockSizeNotDivisibleHeight(self): # The block size divides height but not width. x_np = [[[[1], [2]], [[3], [4]], [[5], [6]]]] block_size = 3 with self.assertRaises(IndexError): _ = tf.space_to_depth(x_np, block_size)
def testDepthInterleavedDepth3(self): x_np = [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]] with self.test_session(use_gpu=False): block_size = 2 out_tf = tf.space_to_depth(x_np, block_size) self.assertAllEqual(out_tf.eval(), [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]])
def testSpaceToDepthTranspose(self): x = np.arange(5 * 10 * 16 * 7, dtype=np.float32).reshape([5, 10, 16, 7]) block_size = 2 paddings = np.zeros((2, 2), dtype=np.int32) y1 = tf.space_to_batch(x, paddings, block_size=block_size) y2 = tf.transpose(tf.space_to_depth(tf.transpose(x, [3, 1, 2, 0]), block_size=block_size), [3, 1, 2, 0]) with self.test_session(): self.assertAllEqual(y1.eval(), y2.eval())
def testBlockSizeNotDivisibleDepth(self): # The depth is not divisible by the square of the block size. x_np = [[[[1, 1, 1, 1], [2, 2, 2, 2]], [[3, 3, 3, 3], [4, 4, 4, 4]]]] block_size = 3 with self.assertRaises(IndexError): _ = tf.space_to_depth(x_np, block_size)
def testLargerInput4x4(self): x_np = [[[[1], [2], [5], [6]], [[3], [4], [7], [8]], [[9], [10], [13], [14]], [[11], [12], [15], [16]]]] with self.test_session(use_gpu=False): block_size = 4 out_tf = tf.space_to_depth(x_np, block_size) self.assertAllEqual( out_tf.eval(), [[[[1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16]]]])
def testDepthInterleavedLarge(self): x_np = [[[[1, 10], [2, 20], [5, 50], [6, 60]], [[3, 30], [4, 40], [7, 70], [8, 80]], [[9, 90], [10, 100], [13, 130], [14, 140]], [[11, 110], [12, 120], [15, 150], [16, 160]]]] with self.test_session(use_gpu=False): block_size = 2 out_tf = tf.space_to_depth(x_np, block_size) self.assertAllEqual(out_tf.eval(), [[[[1, 10, 2, 20, 3, 30, 4, 40], [5, 50, 6, 60, 7, 70, 8, 80]], [[9, 90, 10, 100, 11, 110, 12, 120], [13, 130, 14, 140, 15, 150, 16, 160]]]])
def testNonSquare(self): x_np = [[[[1, 10], [2, 20]], [[3, 30], [4, 40]], [[5, 50], [6, 60]], [[7, 70], [8, 80]], [[9, 90], [10, 100]], [[11, 110], [12, 120]]]] with self.test_session(use_gpu=False): block_size = 2 out_tf = tf.space_to_depth(x_np, block_size) self.assertAllEqual(out_tf.eval(), [[[[1, 10, 2, 20, 3, 30, 4, 40]], [[5, 50, 6, 60, 7, 70, 8, 80]], [[9, 90, 10, 100, 11, 110, 12, 120]]]])
def _checkGrad(self, x, block_size): assert 4 == x.ndim with self.test_session(): tf_x = tf.convert_to_tensor(x) tf_y = tf.space_to_depth(tf_x, block_size) epsilon = 1e-2 ((x_jacob_t, x_jacob_n)) = tf.test.compute_gradient( tf_x, x.shape, tf_y, tf_y.get_shape().as_list(), x_init_value=x, delta=epsilon) self.assertAllClose(x_jacob_t, x_jacob_n, rtol=1e-2, atol=epsilon)
def __call__(self, shape, dtype='float32'): # tf needs partition_info=None shape = list(shape) if self.scale == 1: return self.initializer(shape) new_shape = shape[:3] + [shape[3] // (self.scale ** 2)] if type(self.initializer) is dict: self.initializer = initializers.deserialize(self.initializer) var_x = self.initializer(new_shape, dtype) var_x = tf.transpose(var_x, perm=[2, 0, 1, 3]) var_x = tf.image.resize_nearest_neighbor( var_x, size=(shape[0] * self.scale, shape[1] * self.scale), align_corners=True) var_x = tf.space_to_depth(var_x, block_size=self.scale, data_format='NHWC') var_x = tf.transpose(var_x, perm=[1, 2, 0, 3]) return var_x
def icnr_keras(shape, dtype=None): """ Custom initializer for subpix upscaling From https://github.com/kostyaev/ICNR Note: upscale factor is fixed to 2, and the base initializer is fixed to random normal. """ # TODO Roll this into ICNR_init when porting GAN 2.2 shape = list(shape) scale = 2 initializer = tf.keras.initializers.RandomNormal(0, 0.02) new_shape = shape[:3] + [int(shape[3] / (scale ** 2))] var_x = initializer(new_shape, dtype) var_x = tf.transpose(var_x, perm=[2, 0, 1, 3]) var_x = tf.image.resize_nearest_neighbor(var_x, size=(shape[0] * scale, shape[1] * scale)) var_x = tf.space_to_depth(var_x, block_size=scale) var_x = tf.transpose(var_x, perm=[1, 2, 0, 3]) return var_x
def read_and_batchify_image(image_path, shape, image_type="jpg"): """Return the original image as read from image_path and the image splitted as a batch tensor. Args: image_path: image path shape: batch shape, like: [no_patches_per_side**2, patch_side, patch_side, 3] image_type: image type Returns: original_image, patches where original image is a tensor in the format [widht, height 3] and patches is a tensor of processed images, ready to be classified, with size [batch_size, w, h, 3]""" original_image = read_image(image_path, 3, image_type) # extract values from shape patch_side = shape[1] no_patches_per_side = int(math.sqrt(shape[0])) resized_input_side = patch_side * no_patches_per_side resized_image = resize_bl(original_image, resized_input_side) resized_image = tf.expand_dims(resized_image, 0) patches = tf.space_to_depth(resized_image, patch_side) print(patches) patches = tf.squeeze(patches, [0]) #4,4,192*192*3 print(patches) patches = tf.reshape(patches, [no_patches_per_side**2, patch_side, patch_side, 3]) print(patches) patches_a = tf.split(0, no_patches_per_side**2, patches) print(patches_a) normalized_patches = [] for patch in patches_a: patch_as_input_image = zm_mp( tf.reshape(tf.squeeze(patch, [0]), [patch_side, patch_side, 3])) print(patch_as_input_image) normalized_patches.append(patch_as_input_image) # the last patch is not a "patch" but the whole image resized to patch_side² x 3 # to give a glance to the whole image, in parallel with the patch analysis normalized_patches.append(zm_mp(resize_bl(original_image, patch_side))) batch_of_patches = tf.pack(normalized_patches) return tf.image.convert_image_dtype(original_image, tf.uint8), batch_of_patches
def space_to_depth_x2(x): """Thin wrapper for Tensorflow space_to_depth with block_size=2.""" # Import currently required to make Lambda work. # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273 import tensorflow as tf return tf.space_to_depth(x, block_size=2)
def dec_down( gs, zs_posterior, training, init = False, dropout_p = 0.5, n_scales = 1, n_residual_blocks = 2, activation = "elu", n_latent_scales = 2): assert n_residual_blocks % 2 == 0 gs = list(gs) zs_posterior = list(zs_posterior) with model_arg_scope( init = init, dropout_p = dropout_p, activation = activation): # outputs hs = [] # hidden units ps = [] # priors zs = [] # prior samples # prepare input n_filters = gs[-1].shape.as_list()[-1] h = nn.nin(gs[-1], n_filters) for l in range(n_scales): # level module ## hidden units for i in range(n_residual_blocks // 2): h = nn.residual_block(h, gs.pop()) hs.append(h) if l < n_latent_scales: ## prior spatial_shape = h.shape.as_list()[1] n_h_channels = h.shape.as_list()[-1] if spatial_shape == 1: ### no spatial correlations p = latent_parameters(h) ps.append(p) z_prior = latent_sample(p) zs.append(z_prior) else: ### four autoregressively modeled groups if training: z_posterior_groups = nn.split_groups(zs_posterior[0]) p_groups = [] z_groups = [] p_features = tf.space_to_depth(nn.residual_block(h), 2) for i in range(4): p_group = latent_parameters(p_features, num_filters = n_h_channels) p_groups.append(p_group) z_group = latent_sample(p_group) z_groups.append(z_group) # ar feedback sampled from if training: feedback = z_posterior_groups.pop(0) else: feedback = z_group # prepare input for next group if i + 1 < 4: p_features = nn.residual_block(p_features, feedback) if training: assert not z_posterior_groups # complete prior parameters p = nn.merge_groups(p_groups) ps.append(p) # complete prior sample z_prior = nn.merge_groups(z_groups) zs.append(z_prior) ## vae feedback sampled from if training: ## posterior z = zs_posterior.pop(0) else: ## prior z = z_prior for i in range(n_residual_blocks // 2): n_h_channels = h.shape.as_list()[-1] h = tf.concat([h, z], axis = -1) h = nn.nin(h, n_h_channels) h = nn.residual_block(h, gs.pop()) hs.append(h) else: for i in range(n_residual_blocks // 2): h = nn.residual_block(h, gs.pop()) hs.append(h) # prepare input to next level if l + 1 < n_scales: n_filters = gs[-1].shape.as_list()[-1] h = nn.upsample(h, n_filters) assert not gs if training: assert not zs_posterior return hs, ps, zs
def reorg(x, stride): return tf.space_to_depth(x, block_size=stride)
def conv_layers(tensor, filters, kernels, strides=None, pool_sizes=None, pool_strides=None, padding="same", activation=tf.nn.relu, linear_top_layer=False, drop_rates=None, drop_type="regular", conv_method="conv", pool_method="conv", pool_activation=None, dilations=None, batch_norm=False, training=False, weight_decay=0.0, weight_regularizer="l2", **kwargs): """Builds a stack of convolutional layers with dropout and max pooling.""" if not filters: return tensor kernels = _to_array(kernels, len(filters), 1) pool_sizes = _to_array(pool_sizes, len(filters), 1) pool_strides = _to_array(pool_strides, len(filters), 1) strides = _to_array(strides, len(filters), 1) drop_rates = _to_array(drop_rates, len(filters), 0.) dilations = _to_array(dilations, len(filters), 1) conv_method = _to_array(conv_method, len(filters), "conv") pool_method = _to_array(pool_method, len(filters), "conv") kernel_initializer = tf.glorot_uniform_initializer() kernel_regularizer = regularizer_ops.weight_regularizer( weight_decay, weight_regularizer) conv = { "conv": functools.partial(tf.keras.layers.Conv2D, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer), "transposed": functools.partial(tf.keras.layers.Conv2DTranspose, kernel_initializer=kernel_initializer, kernel_regularizer=kernel_regularizer), "separable": functools.partial(tf.keras.layers.SeparableConv2D, depthwise_initializer=kernel_initializer, pointwise_initializer=kernel_initializer, depthwise_regularizer=kernel_regularizer, pointwise_regularizer=kernel_regularizer), } for i, (fs, ks, ss, pz, pr, drp, dl, cm, pm) in enumerate( zip(filters, kernels, strides, pool_sizes, pool_strides, drop_rates, dilations, conv_method, pool_method)): with tf.variable_scope("conv_block_%d" % i): if i == len(filters) - 1 and linear_top_layer: activation = None pool_activation = None tensor = noise_ops.dropout(tensor, drp, training=training, type=drop_type) if dl > 1: conv_kwargs = _merge_dicts(kwargs, {"dilation_rate": dl}) else: conv_kwargs = kwargs tensor = conv[cm](filters=fs, kernel_size=ks, strides=ss, padding=padding, use_bias=False, name="conv2d", **conv_kwargs).apply(tensor) if activation: if batch_norm: tensor = tf.layers.batch_normalization(tensor, training=training) tensor = activation(tensor) if pz > 1: if pm == "max": tensor = tf.keras.layers.MaxPool2D( pz, pr, padding, name="max_pool").apply(tensor) elif pm == "std": tensor = tf.space_to_depth(tensor, pz, name="space_to_depth") elif pm == "dts": tensor = tf.depth_to_space(tensor, pz, name="depth_to_space") else: tensor = conv["conv"](fs, pz, pr, padding, use_bias=False, name="strided_conv2d", **kwargs).apply(tensor) if pool_activation: if batch_norm: tensor = tf.layers.batch_normalization( tensor, training=training) tensor = pool_activation(tensor) return tensor
def conv_layers(tensor, filters, kernels, strides=None, pool_sizes=None, pool_strides=None, padding="same", activation=tf.nn.relu, use_bias=False, linear_top_layer=False, drop_rates=None, conv_method="conv", pool_method="conv", pool_activation=None, batch_norm=False, training=False, weight_decay=0.0002, **kwargs): """Builds a stack of convolutional layers with dropout and max pooling.""" if pool_sizes is None: pool_sizes = [1] * len(filters) if pool_strides is None: pool_strides = pool_sizes if strides is None: strides = [1] * len(filters) if drop_rates is None: drop_rates = [0.] * len(filters) elif isinstance(drop_rates, numbers.Number): drop_rates = [drop_rates] * len(filters) if conv_method == "conv": conv = functools.partial( tf.layers.conv2d, kernel_initializer=tf.glorot_uniform_initializer(), kernel_regularizer=tf.contrib.layers.l2_regularizer(weight_decay)) elif conv_method == "transposed": conv = functools.partial( tf.layers.conv2d_transpose, kernel_initializer=tf.glorot_uniform_initializer(), kernel_regularizer=tf.contrib.layers.l2_regularizer(weight_decay)) elif conv_method == "separable": conv = functools.partial( tf.layers.separable_conv2d, depthwise_initializer=tf.glorot_uniform_initializer(), pointwise_initializer=tf.glorot_uniform_initializer(), depthwise_regularizer=tf.contrib.layers.l2_regularizer(weight_decay), pointwise_regularizer=tf.contrib.layers.l2_regularizer(weight_decay)) for i, (fs, ks, ss, pz, pr, drp) in enumerate( zip(filters, kernels, strides, pool_sizes, pool_strides, drop_rates)): with tf.variable_scope("conv_block_%d" % i): if i == len(filters) - 1 and linear_top_layer: activation = None pool_activation = None tensor = tf.layers.dropout(tensor, drp) tensor = conv( tensor, fs, ks, ss, padding, use_bias=use_bias, name="conv2d", **kwargs) if activation: if batch_norm: tensor = batch_normalization(tensor, training=training) tensor = activation(tensor) if pz > 1: if pool_method == "max": tensor = tf.layers.max_pooling2d( tensor, pz, pr, padding, name="max_pool") elif pool_method == "std": tensor = tf.space_to_depth(tensor, pz, name="space_to_depth") elif pool_method == "dts": tensor = tf.depth_to_space(tensor, pz, name="depth_to_space") else: tensor = conv( tensor, fs, pz, pr, padding, use_bias=use_bias, name="strided_conv2d", **kwargs) if pool_activation: if batch_norm: tensor = batch_normalization(tensor, training=training) tensor = pool_activation(tensor) return tensor
def build_graph(parameters): input_tensor = tf.placeholder(dtype=parameters["dtype"], name="input", shape=parameters["input_shape"]) out = tf.space_to_depth(input_tensor, block_size=parameters["block_size"]) return [input_tensor], [out]
def forward(self, x, **kwargs): return tf.space_to_depth(x, self.block_size), None
def discriminator_simplified_api(inputs, is_train=True, reuse=False): df_dim = 64 # Dimension of discrim filters in first conv layer. [64] c_dim = FLAGS.c_dim # n_color 3 batch_size = FLAGS.batch_size # 64 w_init = tf.random_normal_initializer(stddev=0.02) gamma_init = tf.random_normal_initializer(1., 0.02) with tf.variable_scope("discriminator", reuse=reuse): tl.layers.set_name_reuse(reuse) net_in = InputLayer(inputs, name='d/in') net_h0 = \ Conv2d( net_in, df_dim, (5, 5), act=lambda x: tl.act.lrelu(x, 0.2), padding='VALID', W_init=w_init, name='d/h0/conv2d', ) net_h1 = \ Conv2d( net_h0, df_dim*2, (5, 5), act=None, padding='VALID', W_init=w_init, b_init=None, name='d/h1/conv2d' ) net_h1 = \ BatchNormLayer( net_h1, act=lambda x: tl.act.lrelu(x, 0.2), is_train=is_train, gamma_init=gamma_init, name='d/h1/batch_norm' ) net_h1.outputs = tf.space_to_depth(net_h1.outputs, 2) net_h2 = \ Conv2d( net_h1, df_dim*4, (5, 5), act=None, padding='VALID', W_init=w_init, b_init=None, name='d/h2/conv2d', ) net_h2 = \ BatchNormLayer( net_h2, act=lambda x: tl.act.lrelu(x, 0.2), is_train=is_train, gamma_init=gamma_init, name='d/h2/batch_norm', ) net_h2.outputs = tf.space_to_depth(net_h2.outputs, 2) net_h3 = \ Conv2d( net_h2, df_dim*4, (5, 5), act=None, padding='VALID', W_init=w_init, b_init=None, name='d/h3/conv2d', ) net_h3 = \ BatchNormLayer( net_h3, act=lambda x: tl.act.lrelu(x, 0.2), is_train=is_train, gamma_init=gamma_init, name='d/h3/batch_norm', ) net_h3.outputs = tf.space_to_depth(net_h3.outputs, 2) net_h3 = \ Conv2d( net_h2, df_dim*4, (5, 5), act=None, padding='VALID', W_init=w_init, b_init=None, name='d/h4/conv2d', ) net_h3 = \ BatchNormLayer( net_h3, act=lambda x: tl.act.lrelu(x, 0.2), is_train=is_train, gamma_init=gamma_init, name='d/h4/batch_norm', ) net_h4 = \ FlattenLayer( net_h3, name='d/h5/flatten', ) net_h4 = \ DenseLayer( net_h4, n_units=1, act=tf.identity, W_init=w_init, name='d/h5/lin_sigmoid', ) logits = net_h4.outputs net_h4.outputs = tf.nn.sigmoid(net_h4.outputs) return net_h4, logits
def _testOne(self, inputs, block_size, outputs): for use_gpu in [False, True]: with self.test_session(use_gpu=use_gpu): x_tf = tf.space_to_depth(tf.to_float(inputs), block_size) self.assertAllEqual(x_tf.eval(), outputs)
def testUnknownShape(self): t = tf.space_to_depth(tf.placeholder(tf.float32), block_size=4) self.assertEqual(4, t.get_shape().ndims)
def position_sensitive_crop_regions(image, boxes, box_ind, crop_size, num_spatial_bins, global_pool, extrapolation_value=None): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 4-D tensor of shape `[batch, image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor specifies the coordinates of a box in the `box_ind[i]` image and is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. Normalized coordinates outside the `[0, 1]` range are allowed, in which case we use `extrapolation_value` to extrapolate the input image values. box_ind: A `Tensor` of type `int32`. A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box refers to. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. extrapolation_value: An optional `float`. Defaults to `0`. Value used for extrapolation, when applicable. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError('crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size, extrapolation_value=extrapolation_value) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( position_sensitive_features, [1, 2], keep_dims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [tf.space_to_depth( crop, block_size=block_size) for crop in image_crops] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze( tf.batch_to_space_nd(position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), squeeze_dims=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.depth_to_space( position_sensitive_features, block_size=block_size) return position_sensitive_features
def space_to_depth_x4(x): """Thin wrapper for Tensorflow space_to_depth with block_size=4.""" # Import currently required to make Lambda work. import tensorflow as tf return tf.space_to_depth(x, block_size=4)
def position_sensitive_crop_regions(image, boxes, box_ind, crop_size, num_spatial_bins, global_pool, extrapolation_value=None): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 4-D tensor of shape `[batch, image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor specifies the coordinates of a box in the `box_ind[i]` image and is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. Normalized coordinates outside the `[0, 1]` range are allowed, in which case we use `extrapolation_value` to extrapolate the input image values. box_ind: A `Tensor` of type `int32`. A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box refers to. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. extrapolation_value: An optional `float`. Defaults to `0`. Value used for extrapolation, when applicable. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError( 'crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): crop = tf.image.crop_and_resize( split, box, box_ind, bin_crop_size, extrapolation_value=extrapolation_value) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( position_sensitive_features, [1, 2], keepdims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [ tf.space_to_depth(crop, block_size=block_size) for crop in image_crops ] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze(tf.batch_to_space_nd( position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), squeeze_dims=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.depth_to_space( position_sensitive_features, block_size=block_size) return position_sensitive_features
def decompress_seqcnn(x, targets, targets_vocab_size, dilations_and_kernels, block_size, is_2d=False, embedding_var=None, name=None, reuse=None): """Decompress x into targets size using a Sequence CNN at every element.""" with tf.variable_scope( name, default_name="decompress_batch_seqcnn", values=[x, targets], reuse=reuse): # We assume targets are [batch x block_size * N x block_size * N x C] if # is_2d=True or [batch, block_size * N, 1, C] otherwise, and C is static. # Let's shift targets to depth and embed. targets_shape, targets_shape_static = tf.shape(targets), targets.get_shape() channels = int(targets_shape_static[-1]) hidden_size = int(x.get_shape()[-1]) if is_2d: depth_targets = tf.space_to_depth(targets, block_size) factor = channels * block_size * block_size else: depth_targets = tf.reshape(targets, [ targets_shape[0], targets_shape[1] // block_size, 1, channels * block_size ]) factor = channels * block_size if embedding_var is None: embedding_var = tf.get_variable("targets_embedding", [targets_vocab_size, hidden_size]) targets_emb = tf.gather(embedding_var, depth_targets) # Flatten x and embedded targets. Flat targets are factor* larger on axis=1. flat_x = tf.reshape(x, [-1, 1, 1, hidden_size]) flat_targets = tf.reshape(targets_emb, [-1, factor, 1, hidden_size]) shifted_targets = shift_left(flat_targets) # Run a SeqCNN large-batch to produce factor outputs out of every target. flat_x += tf.zeros_like(shifted_targets) # Broadcast on axis=1. flat_outputs = conv_block( tf.concat([flat_x, shifted_targets], axis=3), hidden_size, dilations_and_kernels, padding="LEFT") # Reshape back to embedded targets shape. outputs = tf.reshape(flat_outputs, [ tf.shape(targets_emb)[0], tf.shape(targets_emb)[1], tf.shape(targets_emb)[2], factor * hidden_size ]) # Move depth back to target space. if is_2d: outputs = tf.depth_to_space(outputs, 2) else: outputs = tf.reshape(outputs, [ tf.shape(outputs)[0], block_size * tf.shape(outputs)[1], 1, hidden_size ]) # Final reshape before prediction to ensure target size. outputs = tf.reshape(outputs, [ targets_shape[0], targets_shape[1], targets_shape[2], channels, hidden_size ]) return tf.layers.dense(outputs, targets_vocab_size)
def forward(self, x): dk = 3 activate = tf.nn.leaky_relu mf = self.main_channel_nums num_block = self.num_blocks n, f1, w, h, c = x.shape ki = tf.contrib.layers.xavier_initializer() ds = 1 with tf.variable_scope('nlvsr', reuse=tf.AUTO_REUSE) as scope: conv0 = Conv2D(mf, 5, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='conv0') conv1 = [ Conv2D(mf, dk, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='conv1_{}'.format(i)) for i in range(num_block) ] conv10 = [ Conv2D(mf, 1, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='conv10_{}'.format(i)) for i in range(num_block) ] conv2 = [ Conv2D(mf, dk, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='conv2_{}'.format(i)) for i in range(num_block) ] convmerge1 = Conv2D(48, 3, strides=ds, padding='same', activation=activate, kernel_initializer=ki, name='convmerge1') convmerge2 = Conv2D(12, 3, strides=ds, padding='same', activation=None, kernel_initializer=ki, name='convmerge2') inp0 = [x[:, i, :, :, :] for i in range(f1)] # list[7]:Tensor[8, 64, 64, 3] inp0 = tf.concat(inp0, axis=-1) # Tensor:[8,64,64,21] inp1 = tf.space_to_depth(inp0, 2) # Tensor:[8,32,32,84] # Tensor:[8,32,32,84] with tf.device('/cpu:0'): inp1 = NonLocalBlock(inp1, int(c) * self.num_frames * 4, sub_sample=self.nonLocal_sub_sample_rate, nltype=1, scope='nlblock_{}'.format(0)) inp1 = tf.depth_to_space(inp1, 2) # Tensor:[8,64,64,21] inp0 += inp1 # Tensor:[8,64,64,21] inp0 = tf.split(inp0, num_or_size_splits=self.num_frames, axis=-1) # list[7]:Tensor[8, 64, 64, 3] inp0 = [conv0(f) for f in inp0] # list[7]:Tensor[8, 64, 64, 64] bic = tf.image.resize_images(x[:, self.num_frames // 2, :, :, :], [w * self.scale, h * self.scale], method=2) # Tensor:[8,256,256,3] for i in range(num_block): inp1 = [conv1[i](f) for f in inp0] base = tf.concat(inp1, axis=-1) base = conv10[i](base) inp2 = [tf.concat([base, f], -1) for f in inp1] inp2 = [conv2[i](f) for f in inp2] inp0 = [tf.add(inp0[j], inp2[j]) for j in range(f1)] merge = tf.concat( inp0, axis=-1 ) # inp0: list[7]:Tensor[8, 64, 64, 64], merge: Tensor[8,64,64,448=7*64] merge = convmerge1(merge) # merge: Tensor[8,64,64,48] large1 = tf.depth_to_space(merge, 2) # large: Tenosr[8,128,128,12] out1 = convmerge2(large1) # out1: Tensor[8,128,128,12] out = tf.depth_to_space(out1, 2) # out: Tensor[8,256,256,3] return tf.stack([out + bic], axis=1, name='out') #out:
def space_to_depth_x2(x): return tf.space_to_depth(x, block_size=2)
def descriptor_loss(descriptors, warped_descriptors, homographies, valid_mask=None, **config): # Compute the position of the center pixel of every cell in the image (batch_size, Hc, Wc) = tf.unstack(tf.to_int32(tf.shape(descriptors)[:3])) coord_cells = tf.stack(tf.meshgrid(tf.range(Hc), tf.range(Wc), indexing='ij'), axis=-1) coord_cells = coord_cells * config['grid_size'] + config[ 'grid_size'] // 2 # (Hc, Wc, 2) # coord_cells is now a grid containing the coordinates of the Hc x Wc # center pixels of the 8x8 cells of the image # Compute the position of the warped center pixels warped_coord_cells = warp_points(tf.reshape(coord_cells, [-1, 2]), homographies) # warped_coord_cells is now a list of the warped coordinates of all the center # pixels of the 8x8 cells of the image, shape (N, Hc x Wc, 2) # Compute the pairwise distances and filter the ones less than a threshold # The distance is just the pairwise norm of the difference of the two grids # Using shape broadcasting, cell_distances has shape (N, Hc, Wc, Hc, Wc) coord_cells = tf.to_float(tf.reshape(coord_cells, [1, Hc, Wc, 1, 1, 2])) warped_coord_cells = tf.reshape(warped_coord_cells, [batch_size, 1, 1, Hc, Wc, 2]) cell_distances = tf.norm(coord_cells - warped_coord_cells, axis=-1) s = tf.to_float(tf.less_equal(cell_distances, config['grid_size'])) # s[id_batch, h, w, h', w'] == 1 if the point of coordinates (h, w) warped by the # homography is at a distance from (h', w') less than config['grid_size'] # and 0 otherwise # Compute the pairwise dot product between descriptors: d^t * d' descriptors = tf.reshape(descriptors, [batch_size, Hc, Wc, 1, 1, -1]) warped_descriptors = tf.reshape(warped_descriptors, [batch_size, 1, 1, Hc, Wc, -1]) dot_product_desc = tf.reduce_sum(descriptors * warped_descriptors, -1) # dot_product_desc[id_batch, h, w, h', w'] is the dot product between the # descriptor at position (h, w) in the original descriptors map and the # descriptor at position (h', w') in the warped image # Compute the loss positive_dist = tf.maximum(0., config['positive_margin'] - dot_product_desc) negative_dist = tf.maximum(0., dot_product_desc - config['negative_margin']) loss = config['lambda_d'] * s * positive_dist + (1 - s) * negative_dist # Mask the pixels if bordering artifacts appear valid_mask = tf.ones([batch_size, Hc, Wc], tf.float32)\ if valid_mask is None else valid_mask valid_mask = tf.to_float(valid_mask[..., tf.newaxis]) # for GPU valid_mask = tf.space_to_depth(valid_mask, config['grid_size']) valid_mask = tf.reduce_prod(valid_mask, axis=3) # AND along the channel dim valid_mask = tf.reshape(valid_mask, [batch_size, 1, 1, Hc, Wc]) normalization = tf.reduce_sum(valid_mask) * tf.to_float(Hc * Wc) loss = tf.reduce_sum(valid_mask * loss) / normalization return loss