def build_graph(parameters): input_tensor = tf.placeholder( dtype=parameters["dtype"], name="input", shape=parameters["input_shape"]) out = tf.batch_to_space_nd(input_tensor, parameters["block_shape"], parameters["crops"]) return [input_tensor], [out]
def _testStaticShape(self, input_shape, block_shape, paddings, error): block_shape = np.array(block_shape) paddings = np.array(paddings) # Try with sizes known at graph construction time. with self.assertRaises(error): _ = tf.batch_to_space_nd( np.zeros(input_shape, np.float32), block_shape, paddings)
def _testPad(self, inputs, block_shape, paddings, outputs): block_shape = np.array(block_shape) paddings = np.array(paddings).reshape((len(block_shape), 2)) for use_gpu in [False, True]: with self.test_session(use_gpu=use_gpu): # outputs = space_to_batch(inputs) x_tf = tf.space_to_batch_nd(tf.to_float(inputs), block_shape, paddings) self.assertAllEqual(x_tf.eval(), outputs) # inputs = batch_to_space(outputs) x_tf = tf.batch_to_space_nd(tf.to_float(outputs), block_shape, paddings) self.assertAllEqual(x_tf.eval(), inputs)
def _testDynamicShape(self, input_shape, block_shape, paddings): block_shape = np.array(block_shape) paddings = np.array(paddings) # Try with sizes unknown at graph construction time. input_placeholder = tf.placeholder(tf.float32) block_shape_placeholder = tf.placeholder(tf.int32, shape=block_shape.shape) paddings_placeholder = tf.placeholder(tf.int32) t = tf.batch_to_space_nd(input_placeholder, block_shape_placeholder, paddings_placeholder) with self.assertRaises(ValueError): _ = t.eval({input_placeholder: np.zeros(input_shape, np.float32), block_shape_placeholder: block_shape, paddings_placeholder: paddings})
def _checkGrad(self, x, block_shape, crops): block_shape = np.array(block_shape) crops = np.array(crops).reshape((len(block_shape), 2)) with self.test_session(): tf_x = tf.convert_to_tensor(x) tf_y = tf.batch_to_space_nd(tf_x, block_shape, crops) epsilon = 1e-5 ((x_jacob_t, x_jacob_n)) = tf.test.compute_gradient( tf_x, x.shape, tf_y, tf_y.get_shape().as_list(), x_init_value=x, delta=epsilon) self.assertAllClose(x_jacob_t, x_jacob_n, rtol=1e-2, atol=epsilon)
def upscale_height(images, scale): """Box upscaling along the H (axis=1) dimension. Args: images: A 4D `Tensor` in NHWC format. scale: A positive integer scale. Returns: A 4D `Tensor` of `images` up scaled by a factor `scale`. Raises: ValueError: If `scale` is not a positive integer. """ scale = _get_validated_scale(scale) if scale == 1: return images images = tf.batch_to_space_nd( tf.tile(images, [scale, 1, 1, 1]), block_shape=[scale], crops=[[0, 0]]) return images
def layer_op(self, input_tensor): spatial_rank = layer_util.infer_spatial_rank(input_tensor) output_tensor = input_tensor if self.func == 'REPLICATE': if self.kernel_size != self.stride: raise ValueError( "`kernel_size` != `stride` currently not" "supported in `REPLICATE` mode. Please" "consider using `CHANNELWISE_DECONV` operation.") # simply replicate input values to # local regions of (kernel_size ** spatial_rank) element kernel_size_all_dims = layer_util.expand_spatial_params( self.kernel_size, spatial_rank) pixel_num = np.prod(kernel_size_all_dims) repmat = np.hstack((pixel_num, [1] * spatial_rank, 1)).flatten() output_tensor = tf.tile(input=input_tensor, multiples=repmat) output_tensor = tf.batch_to_space_nd( input=output_tensor, block_shape=kernel_size_all_dims, crops=[[0, 0]] * spatial_rank) elif self.func == 'CHANNELWISE_DECONV': output_tensor = [tf.expand_dims(x, -1) for x in tf.unstack(input_tensor, axis=-1)] output_tensor = [DeconvLayer(n_output_chns=1, kernel_size=self.kernel_size, stride=self.stride, padding='SAME', with_bias=self.with_bias, w_initializer=self.initializers['w'], w_regularizer=self.regularizers['w'], b_initializer=self.initializers['b'], b_regularizer=self.regularizers['b'], name='deconv_{}'.format(i))(x) for (i, x) in enumerate(output_tensor)] output_tensor = tf.concat(output_tensor, axis=-1) return output_tensor
def testUnknownShape(self): # Verify that input shape and paddings shape can be unknown. _ = tf.batch_to_space_nd( tf.placeholder(tf.float32), tf.placeholder(tf.int32, shape=(2,)), tf.placeholder(tf.int32)) # Only number of input dimensions is known. t = tf.batch_to_space_nd( tf.placeholder(tf.float32, shape=(None, None, None, None)), tf.placeholder(tf.int32, shape=(2,)), tf.placeholder(tf.int32)) self.assertEqual(4, t.get_shape().ndims) # Dimensions are partially known. t = tf.batch_to_space_nd( tf.placeholder(tf.float32, shape=(None, None, None, 2)), tf.placeholder(tf.int32, shape=(2,)), tf.placeholder(tf.int32)) self.assertEqual([None, None, None, 2], t.get_shape().as_list()) # Dimensions are partially known. t = tf.batch_to_space_nd( tf.placeholder(tf.float32, shape=(3 * 2 * 3, None, None, 2)), [2, 3], tf.placeholder(tf.int32)) self.assertEqual([3, None, None, 2], t.get_shape().as_list()) # Dimensions are partially known. t = tf.batch_to_space_nd( tf.placeholder(tf.float32, shape=(3 * 2 * 3, None, 2, 2)), [2, 3], [[1, 1], [0, 1]]) self.assertEqual([3, None, 5, 2], t.get_shape().as_list()) # Dimensions are fully known. t = tf.batch_to_space_nd( tf.placeholder(tf.float32, shape=(3 * 2 * 3, 2, 1, 2)), [2, 3], [[1, 1], [0, 0]]) self.assertEqual([3, 2, 3, 2], t.get_shape().as_list())
def position_sensitive_crop_regions(image, boxes, crop_size, num_spatial_bins, global_pool): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 3-D tensor of shape `[image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError('crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): if split.shape.is_fully_defined() and box.shape.is_fully_defined(): crop = tf.squeeze( matmul_crop_and_resize( tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0), bin_crop_size), axis=0) else: crop = tf.image.crop_and_resize( tf.expand_dims(split, 0), box, tf.zeros(tf.shape(boxes)[0], dtype=tf.int32), bin_crop_size) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( position_sensitive_features, [1, 2], keep_dims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [tf.space_to_depth( crop, block_size=block_size) for crop in image_crops] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze( tf.batch_to_space_nd(position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), squeeze_dims=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.depth_to_space( position_sensitive_features, block_size=block_size) return position_sensitive_features
def sub_pixel_1D(self, act, expansion = 2): up_sampled = tf.transpose(acts, [2, 1, 0]) up_sampled = tf.batch_to_space_nd(up_sampled, [expansion], [[0, 0]]) return tf.transpose(up_sampled, [2, 1, 0])
def _PS(self, I, r): X = tf.transpose(I, [2, 1, 0]) # (r, w, b) X = tf.batch_to_space_nd(X, [r], [[0, 0]]) # (1, r*w, b) X = tf.transpose(X, [2, 1, 0]) return X
def batch_to_space_nd(self, block_shape, crops): value = tf.batch_to_space_nd(self.value, block_shape, crops) return DenseTensor(value)
tf.cumsum() tf.constant() tf.convert_to_tensor() tf.convert_to_tensor_or_indexed_slices() tf.convert_to_tensor_or_sparse_tensor() tf.decode_base64() tf.decode_csv() tf.decode_json_example() tf.decode_raw() tf.device() tf.diag() tf.diag_part() tf.div() tf.divide() tf.batch_to_space_nd() tf.space_to_batch_nd() tf.batch_to_space() tf.space_to_batch() tf.depth_to_space() tf.space_to_depth() tf.dtypes tf.get_collection() tf.get_collection_ref() tf.get_default_session() tf.get_local_variable tf.get_seed() tf.get_session_handle()
def position_sensitive_crop_regions(image, boxes, box_ind, crop_size, num_spatial_bins, global_pool, extrapolation_value=None): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 4-D tensor of shape `[batch, image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor specifies the coordinates of a box in the `box_ind[i]` image and is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. Normalized coordinates outside the `[0, 1]` range are allowed, in which case we use `extrapolation_value` to extrapolate the input image values. box_ind: A `Tensor` of type `int32`. A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box refers to. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. extrapolation_value: An optional `float`. Defaults to `0`. Value used for extrapolation, when applicable. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError('crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size, extrapolation_value=extrapolation_value) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( position_sensitive_features, [1, 2], keep_dims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [tf.space_to_depth( crop, block_size=block_size) for crop in image_crops] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze( tf.batch_to_space_nd(position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), squeeze_dims=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.depth_to_space( position_sensitive_features, block_size=block_size) return position_sensitive_features
def srvc_base(lr, scale, F, block_h, block_w): est = lr patches = tf.space_to_batch_nd(lr, block_shape=[block_h, block_w], paddings=[[0, 0], [0, 0]]) features = tf.layers.conv2d(patches, 256, 3, strides=(1, 1), padding='valid', data_format='channels_last', dilation_rate=(1, 1), activation=tf.nn.relu, use_bias=True) kernel = tf.layers.conv2d(features, 3 * 3 * 3 * F, 3, strides=(1, 1), padding='valid', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True) bias = tf.layers.conv2d(features, F, 3, strides=(1, 1), padding='valid', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True) kernel = tf.reshape(kernel, [-1, 1, 1, 3 * 3 * 3, F]) bias = tf.reshape(bias, [-1, 1, 1, F]) patches = tf.image.extract_patches(patches, sizes=[1, 3, 3, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') patches = tf.expand_dims(patches, axis=3) patches = tf.matmul(patches, kernel) patches = tf.squeeze(patches, axis=3) + bias patches = tf.nn.relu(patches) est = tf.batch_to_space_nd(patches, block_shape=[block_h, block_w], crops=[[0, 0], [0, 0]]) est = tf.layers.conv2d(est, 128, 5, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=tf.nn.relu, use_bias=True) est = tf.layers.conv2d(est, 32, 3, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=tf.nn.relu, use_bias=True) est = tf.layers.conv2d(est, 3 * scale * scale, 3, strides=(1, 1), padding='same', data_format='channels_last', dilation_rate=(1, 1), activation=None, use_bias=True) est = tf.nn.depth_to_space(est, scale, data_format='NHWC') indepth_est = est return indepth_est
def batch_to_space_nd(self, block_shape, crops): backing = tf.batch_to_space_nd(self.value, block_shape, crops) return int64factory.tensor(backing)
def test_fold_old_batch_norms_with_batch_to_space(self): """ Python port of TestFoldFusedBatchNormsWithBatchToSpace() in the TF Graph Transform Tool tests. """ input_data = (np.array( [1., 4., 2., 5., 3., 6., -1., -4., -2., -5., -3., -6.], dtype=np.float32).reshape([2, 1, 3, 2])) weights_data = (np.array([1., 2., 3., 4., 0.1, 0.2, 0.3, 0.4], dtype=np.float32).reshape([1, 2, 2, 2])) mean_data = np.array([10., 20.], dtype=np.float32).reshape([2]) variance_data = np.array([0.25, 0.5], dtype=np.float32).reshape([2]) beta_data = np.array([0.1, 0.6], dtype=np.float32).reshape([2]) gamma_data = np.array([1., 2.], dtype=np.float32).reshape([2]) block_shape_data = np.array([1, 2]).reshape([2]) crops_data = np.array([0, 0, 0, 1]).reshape([2, 2]) # Create the parts below the batch norm using TF APIs # (input, weights) --> Conv2D --> BatchToSpaceND --> [...], # plus inputs to [...] tf_g = tf.Graph() with tf_g.as_default(): in_t = tf.constant(input_data, name="input_op") weights_t = tf.constant(weights_data, name="weights_op") conv_t = tf.nn.conv2d(in_t, weights_t, [1, 1, 1, 1], "VALID", name="conv_op") batch_to_space_t = tf.batch_to_space_nd(conv_t, block_shape_data, crops_data, name="batch_to_space_op") mean_t = tf.constant(mean_data, name="mean_op") variance_t = tf.constant(variance_data, name="variance_op") beta_t = tf.constant(beta_data, name="beta_op") gamma_t = tf.constant(gamma_data, name="gamma_op") g = gde.Graph(tf_g) # Now add the FusedBatchNorm node directly, since there's no TF API to # create that op. batch_norm_node = g.add_node("output", "FusedBatchNorm") batch_norm_node.set_inputs([ g[batch_to_space_t.name], g[gamma_t.name], g[beta_t.name], g[mean_t.name], g[variance_t.name] ]) batch_norm_node.add_attr("T", tf.float32) batch_norm_node.add_attr("epsilon", 0.00001) batch_norm_node.add_attr("is_training", False) batch_norm_node.infer_outputs() # Run the graph before and after the rewrite and compare results with tf.Session(graph=g.to_tf_graph()) as sess: original_outputs = sess.run("output:0") gde.rewrite.fold_old_batch_norms(g) with tf.Session(graph=g.to_tf_graph()) as sess: fused_outputs = sess.run("output:0") self.assertClose(original_outputs, fused_outputs, delta=1e-5) # Make sure the rewrite happened for n in g.nodes: self.assertNotEqual(n.op_type, "FusedBatchNorm")
def _PS(I, r): X = tf.transpose(I, [2, 1, 0]) # (r, w, b) X = tf.batch_to_space_nd(X, [r], [[0, 0]]) # (1, r*w, b) X = tf.transpose(X, [2, 1, 0]) return X
def conv1d(x, kernel_size, num_filters, causal=False, dilation_rate=1, padding='SAME', stride=1, weight_init=None, name=''): ''' Wrapper for 1D convolutional layer (with both dilation and causal options) :param: tensor: x: Input to convolutional layer. :param: int: kernel_size: Size of kernel for convolution. :param: int: num_filters: Number of filters (a.k.a. kernels/channels) in output. :param: bool: causal: True for causal convolutions. Uses causal padding with zeros like in Keras: [1] https://github.com/keras-team/keras/blob/master/keras/backend/tensorflow_backend.py :param: dilation_rate: Rate for dilated convolution. If dilation=1, we just have normal convolution. :param: padding: 'VALID' or 'SAME'. (Note default is 'VALID' and for causal convolution, padding will be overwritten as 'VALID') :param: weight_init: Weight initializer. Default is 'tf.glorot_uniform_initializer' glorot_uniform_initializer (See https://www.tensorflow.org/api_docs/python/tf/get_variable) :param: Stride of convolution kernel: Default is 1. :param: name: name of operation in tensorflow graph. ''' # Infer number of channels input_channels = int(x.get_shape()[-1]) # Generate weights weights, biases = generate_weights( [kernel_size, input_channels, num_filters], weight_init, name, bias_shape=[num_filters]) if causal: # Left padding for causal convolutions (See ref [1]) causal_padding = dilation_rate * (kernel_size - 1) x = tf.pad(x, [[0, 0], [causal_padding, 0], [0, 0]]) padding = 'VALID' if dilation_rate > 1: # Dialated convolution. # TODO: Need to put in a check to see if input % dilation_rate == 0 # Docs under: 'tf.manip.space_to_batch_nd' for r1.11 stb = tf.space_to_batch_nd(x, paddings=[[0, 0], [0, 0]], block_shape=[dilation_rate, 1], name='stb_{}'.format(name)) conv_1d = tf.nn.conv1d(stb, weights, stride=stride, padding=padding, name='op_{}'.format(name)) # conv_1d = tf.nn.bias_add(conv_1d, biases) conv_1d = tf.batch_to_space_nd(conv_1d, crops=[[0, 0], [0, 0]], block_shape=[dilation_rate, 1], name='bts_{}'.format(name)) conv_1d = tf.nn.bias_add(conv_1d, biases, name='bias_add_{}'.format(name)) else: conv_1d = tf.nn.conv1d(x, weights, stride=stride, padding=padding, name='op_{}'.format(name)) conv_1d = tf.nn.bias_add(conv_1d, biases, name='bias_add_{}'.format(name)) return conv_1d
import tensorflow as tf x = tf.placeholder(tf.float32, shape=[None, 28, 28, 3], name='x') t1 = tf.space_to_batch_nd(x, [2, 2], [[0, 0], [0, 0]]) t2 = tf.batch_to_space_nd(t1, [2, 2], [[0, 0], [0, 0]]) sess = tf.Session() sess.run(tf.global_variables_initializer()) c_t1 = tf.contrib.lite.TFLiteConverter.from_session(sess, [x], [t2]) c_t1_lm = c_t1.convert() open('batch_space.tflite', 'wb').write(c_t1_lm) sess.close()
import numpy as np c = 3 h = 1024 p = 128 image = tf.random_normal([1,h,h,c]) # Image to Patches Conversion pad = [[0,0],[0,0]] patches = tf.space_to_batch_nd(image,[p,p],pad) patches = tf.split(patches,p*p,0) patches = tf.stack(patches,3) patches = tf.reshape(patches,[int((h/p)**2),p,p,c]) # Do processing on patches # Using patches here to reconstruct patches_proc = tf.reshape(patches,[1,int(h/p),int(h/p),int(p*p),c]) patches_proc = tf.split(patches_proc,p*p,3) patches_proc = tf.stack(patches_proc,axis=0) patches_proc = tf.reshape(patches_proc,[p*p,int(h/p),int(h/p),c]) reconstructed = tf.batch_to_space_nd(patches_proc,[p, p],pad) sess = tf.Session() I,P,R_n = sess.run([image,patches,reconstructed]) print(I.shape) print(P.shape) print(R_n.shape) err = np.sum((R_n-I)**2) print(err)
def batch_to_space(x): if dilation == 1: return x return tf.batch_to_space_nd(x, [dilation, dilation], tf.zeros([2, 2], dtype=tf.int32))
def SubPixel1D(I, r): with tf.name_scope('subpixel'): X = tf.transpose(I, [2, 1, 0]) X = tf.batch_to_space_nd(X, [r], [[0, 0]]) X = tf.transpose(X, [2, 1, 0]) return X
# -*- coding: utf-8 -*- import tensorflow as tf import numpy as np np.random.seed(1) b = np.linspace(1, 36, num=36).reshape((1, 6, 6, 1)) a = tf.constant(b, dtype=tf.int32) c = tf.space_to_batch_nd(a, block_shape=[3, 3], paddings=[[0, 0], [0, 0]]) d = tf.batch_to_space_nd(c, block_shape=[3, 3], crops=[[0, 0], [0, 0]]) with tf.Session() as sess: print(sess.run(a)) print(sess.run(c).shape) print(sess.run(c)) print(sess.run(d).shape) print(sess.run(d))
def _phase_shift(I, r=2): X = tf.transpose(I, [2, 1, 0]) # (r, w, b) X = tf.batch_to_space_nd(X, [r], [[0, 0]]) # (1, r*w, b) X = tf.transpose(X, [2, 1, 0]) return X
def __exit__(self, *args): if self.dilation_factor > 1: self._tensor = tf.batch_to_space_nd(self._tensor, self.block_shape, self.zero_paddings, name='de-dilate')
def _construct_batch_to_space_nd(input_shape): a = tf.placeholder(tf.float32, shape=input_shape, name="input") block_shape = tf.constant([2, 2], dtype=tf.int32) crops = tf.constant([[0, 0], [2, 0]], dtype=tf.int32) x = tf.batch_to_space_nd(a, block_shape=block_shape, crops=crops) return x, a
def apply_dna_kernels_dilated(image, kernels, dilation_rate=(1, 1)): dilation_rate = list(dilation_rate) if isinstance( dilation_rate, (tuple, list)) else [dilation_rate] * 2 batch_size, height, width, color_channels = image.get_shape().as_list() batch_size, kernel_height, kernel_width, kernel_size, num_transformed_images = kernels.get_shape( ).as_list() # Flatten the spatial dimensions. kernels_reshaped = tf.reshape(kernels, [ batch_size, kernel_height, kernel_width, kernel_size[0] * kernel_size[1], num_transformed_images ]) image_padded = pad2d(image, kernel_size, rate=dilation_rate, padding='SAME', mode='SYMMETRIC') # for dilation = [2, 2], this is equivalent to this: # small_images = [image[:, 0::2, 0::2, :], image[:, 0::2, 1::2, :], image[:, 1::2, 0::2, :], image[:, 1::2, 1::2, :]] small_images = tf.space_to_batch_nd(image_padded, dilation_rate, paddings=[[0, 0]] * 2) small_images = tf.reshape(small_images, [ dilation_rate[0] * dilation_rate[1], batch_size, image_padded.get_shape().as_list()[1] // dilation_rate[0], image_padded.get_shape().as_list()[2] // dilation_rate[1], color_channels ]) small_images = tf.unstack(small_images, axis=0) small_outputs = [] for small_image in small_images: # Combine channel and batch dimensions into the first dimension. image_transposed = tf.transpose(small_image, [3, 0, 1, 2]) image_reshaped = flatten(image_transposed, 0, 1)[..., None] patches_reshaped = tf.extract_image_patches(image_reshaped, ksizes=[1] + kernel_size + [1], strides=[1] * 4, rates=[1] * 4, padding='VALID') # Separate channel and batch dimensions. patches = tf.reshape(patches_reshaped, [ color_channels, batch_size, height // dilation_rate[0], width // dilation_rate[1], kernel_size[0] * kernel_size[1] ]) # Reduce along the spatial dimensions of the kernel. outputs = tf.reduce_sum(patches[..., None] * kernels_reshaped[None, ...], axis=-2) # Swap channel and transformation dimensions. outputs = tf.transpose(outputs, [4, 1, 2, 3, 0]) outputs = tf.unstack(outputs, axis=0) small_outputs.append(outputs) small_outputs = list(zip(*small_outputs)) small_outputs = [ tf.reshape(small_output, [ dilation_rate[0] * dilation_rate[1] * batch_size, height // dilation_rate[0], width // dilation_rate[1], color_channels ]) for small_output in small_outputs ] outputs = [ tf.batch_to_space_nd(small_output, dilation_rate, crops=[[0, 0]] * 2) for small_output in small_outputs ] return outputs