def testDepthInterleavedDepth3(self): x_np = [[[[1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12]]]] block_size = 2 with self.test_session(use_gpu=False): x_tf = tf.depth_to_space(x_np, block_size) self.assertAllEqual(x_tf.eval(), [[[[1, 2, 3], [4, 5, 6]], [[7, 8, 9], [10, 11, 12]]]])
def SubpixelConv2D(*args, **kwargs): kwargs['output_dim'] = 4*kwargs['output_dim'] output = lib.ops.conv2d.Conv2D(*args, **kwargs) output = tf.transpose(output, [0,2,3,1]) output = tf.depth_to_space(output, 2) output = tf.transpose(output, [0,3,1,2]) return output
def testDepthInterleaved(self): x_np = [[[[1, 10, 2, 20, 3, 30, 4, 40]]]] block_size = 2 with self.test_session(use_gpu=False): x_tf = tf.depth_to_space(x_np, block_size) self.assertAllEqual(x_tf.eval(), [[[[1, 10], [2, 20]], [[3, 30], [4, 40]]]])
def depth_to_space(input, scale, data_format=None): ''' Uses phase shift algorithm to convert channels/depth for spatial resolution ''' data_format = 'NHWC' data_format = data_format.lower() out = tf.depth_to_space(input, scale, data_format=data_format) return out
def testBlockSize0(self): x_np = [[[[1], [2]], [[3], [4]]]] block_size = 0 with self.assertRaises(ValueError): out_tf = tf.depth_to_space(x_np, block_size) out_tf.eval()
def deconv2d(cur, i): thicker = conv( cur, output_filters * 4, (1, 1), padding="SAME", activation=tf.nn.relu, name="deconv2d" + str(i)) return tf.depth_to_space(thicker, 2)
def UpsampleConv(name, input_dim, output_dim, filter_size, inputs, he_init=True, biases=True): output = inputs output = tf.concat([output, output, output, output], axis=1) output = tf.transpose(output, [0,2,3,1]) output = tf.depth_to_space(output, 2) output = tf.transpose(output, [0,3,1,2]) output = lib.ops.conv2d.Conv2D(name, input_dim, output_dim, filter_size, output, he_init=he_init, biases=biases) return output
def testBlockSizeOne(self): x_np = [[[[1, 1, 1, 1], [2, 2, 2, 2]], [[3, 3, 3, 3], [4, 4, 4, 4]]]] block_size = 1 with self.assertRaises(ValueError): out_tf = tf.depth_to_space(x_np, block_size) out_tf.eval()
def testBlockSize4FlatInput(self): x_np = [[[[1, 2, 5, 6, 3, 4, 7, 8, 9, 10, 13, 14, 11, 12, 15, 16]]]] block_size = 4 with self.test_session(use_gpu=False): x_tf = tf.depth_to_space(x_np, block_size) self.assertAllEqual(x_tf.eval(), [[[[1], [2], [5], [6]], [[3], [4], [7], [8]], [[9], [10], [13], [14]], [[11], [12], [15], [16]]]])
def depth_to_space(cls, ipt, scale, data_format=None): """ Uses phase shift algorithm to convert channels/depth for spatial resolution """ if data_format is None: data_format = K.image_data_format() data_format = data_format.lower() ipt = cls._preprocess_conv2d_input(ipt, data_format) out = tf.depth_to_space(ipt, scale) out = cls._postprocess_conv2d_output(out, data_format) return out
def testDepthToSpaceTranspose(self): x = np.arange(20 * 5 * 8 * 7, dtype=np.float32).reshape([20, 5, 8, 7]) block_size = 2 crops = np.zeros((2, 2), dtype=np.int32) y1 = tf.batch_to_space(x, crops, block_size=block_size) y2 = tf.transpose( tf.depth_to_space( tf.transpose(x, [3, 1, 2, 0]), block_size=block_size), [3, 1, 2, 0]) with self.test_session(): self.assertAllEqual(y1.eval(), y2.eval())
def testBlockSizeTooLarge(self): x_np = [[[[1, 2, 3, 4], [5, 6, 7, 8]], [[9, 10, 11, 12], [13, 14, 15, 16]]]] block_size = 4 # Raise an exception, since th depth is only 4 and needs to be # divisible by 16. with self.assertRaises(IndexError): out_tf = tf.depth_to_space(x_np, block_size) out_tf.eval()
def phase_shift(x, upsampling_factor=2, data_format="NCHW", name="PhaseShift"): if data_format == "NCHW": x = tf.transpose(x, [0,2,3,1]) x = tf.depth_to_space(x, upsampling_factor, name=name) if data_format == "NCHW": x = tf.transpose(x, [0,3,1,2]) return x
def decompress_step(source, hparams, first_relu, is_2d, name): """Decompression function.""" with tf.variable_scope(name): shape = common_layers.shape_list(source) multiplier = 4 if is_2d else 2 kernel = (1, 1) if is_2d else (1, 1) thicker = common_layers.conv_block( source, hparams.hidden_size * multiplier, [((1, 1), kernel)], first_relu=first_relu, name="decompress_conv") if is_2d: return tf.depth_to_space(thicker, 2) return tf.reshape(thicker, [shape[0], shape[1] * 2, 1, hparams.hidden_size])
def testNonSquare(self): x_np = [[[[1, 10, 2, 20, 3, 30, 4, 40]], [[5, 50, 6, 60, 7, 70, 8, 80]], [[9, 90, 10, 100, 11, 110, 12, 120]]]] block_size = 2 with self.test_session(use_gpu=False): x_tf = tf.depth_to_space(x_np, block_size) self.assertAllEqual(x_tf.eval(), [[[[1, 10], [2, 20]], [[3, 30], [4, 40]], [[5, 50], [6, 60]], [[7, 70], [8, 80]], [[9, 90], [10, 100]], [[11, 110], [12, 120]]]])
def testDepthInterleavedLarger(self): x_np = [[[[1, 10, 2, 20, 3, 30, 4, 40], [5, 50, 6, 60, 7, 70, 8, 80]], [[9, 90, 10, 100, 11, 110, 12, 120], [13, 130, 14, 140, 15, 150, 16, 160]]]] block_size = 2 with self.test_session(use_gpu=False): x_tf = tf.depth_to_space(x_np, block_size) self.assertAllEqual(x_tf.eval(), [[[[1, 10], [2, 20], [5, 50], [6, 60]], [[3, 30], [4, 40], [7, 70], [8, 80]], [[9, 90], [10, 100], [13, 130], [14, 140]], [[11, 110], [12, 120], [15, 150], [16, 160]]]])
def _PS(X, r, n_out_channels): if n_out_channels >= 1: assert int(X.get_shape()[-1]) == (r**2) * n_out_channels, _err_log # bsize, a, b, c = X.get_shape().as_list() # bsize = tf.shape(X)[0] # Handling Dimension(None) type for undefined batch dim # Xs=tf.split(X,r,3) #b*h*w*r*r # Xr=tf.concat(Xs,2) #b*h*(r*w)*r # X=tf.reshape(Xr,(bsize,r*a,r*b,n_out_channel)) # b*(r*h)*(r*w)*c X = tf.depth_to_space(X, r) else: logging.info(_err_log) return X
def _checkGrad(self, x, block_size): assert 4 == x.ndim with self.test_session(): tf_x = tf.convert_to_tensor(x) tf_y = tf.depth_to_space(tf_x, block_size) epsilon = 1e-2 ((x_jacob_t, x_jacob_n)) = tf.test.compute_gradient( tf_x, x.shape, tf_y, tf_y.get_shape().as_list(), x_init_value=x, delta=epsilon) self.assertAllClose(x_jacob_t, x_jacob_n, rtol=1e-2, atol=epsilon)
def UpsampleConv(name, input_dim, output_dim, filter_size, inputs, he_init=True, biases=True): output = inputs output = tf.concat([output, output, output, output], axis=1) output = tf.transpose(output, [0, 2, 3, 1]) output = tf.depth_to_space(output, 2) output = tf.transpose(output, [0, 3, 1, 2]) output = Conv2D(name, input_dim, output_dim, filter_size, output, he_init=he_init, biases=biases, cpu=CPU) return output
def build_pixel_shuffler_layer(self, name, h, scale, input_filters, output_filters, activator=None): with tf.variable_scope(name): self.build_conv(name + "_CNN", h, self.cnn_size, input_filters, scale * scale * output_filters, use_batch_norm=False, use_bias=True) self.H.append(tf.depth_to_space(self.H[-1], scale)) self.build_activator(self.H[-1], output_filters, activator, base_name=name)
def UpsampleConv(name, input_dim, output_dim, filter_size, inputs, he_init=True, biases=True): output = inputs output = tf.concat([output, output, output, output], 1) output = tf.transpose( output, [0, 2, 3, 1]) # 调换tensor的维度顺序,按照列表perm的维度排列调换tensor顺序 output = tf.depth_to_space(output, 2) # 将一个较多通道的特征变成较少通道的特征 output = tf.transpose(output, [0, 3, 1, 2]) output = lib.ops.conv2d.Conv2D(name, input_dim, output_dim, filter_size, output, he_init=he_init, biases=biases) return output
def G(x, is_train): # shape of x: [B,T_in,H,W,C] # Generate filters and residual # Fx: [B,1,H,W,1*5*5,R*R] # Rx: [B,1,H,W,3*R*R] Fx, Rx = FR(x, is_train, uf=R) x_c = [] for c in range(3): t = DynFilter3D(x[:, T_in // 2:T_in // 2 + 1, :, :, c], Fx[:, 0, :, :, :, :], [1, 5, 5]) # [B,H,W,R*R] t = tf.depth_to_space(t, R) # [B,H*R,W*R,1] x_c += [t] x = tf.concat(x_c, axis=3) # [B,H*R,W*R,3] x = tf.expand_dims(x, axis=1) Rx = depth_to_space_3D(Rx, R) # [B,1,H*R,W*R,3] x += Rx return x
def P_Conv2(net, mask, n_filter=32, filter_size=3, stride=1, name=''): img_patch = tf.extract_image_patches( net.outputs, ksizes=[1, filter_size, filter_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='SAME') img_patch = tf.depth_to_space(img_patch, filter_size) img_patch = tf.multiply(img_patch, mask) n = InputLayer(img_patch, name=name + '_input') n = Conv2d(n, n_filter=n_filter, filter_size=(filter_size, filter_size), padding='VALID', W_init=w_init, b_init=b_init, strides=(filter_size, filter_size), name=name + '_depth') return n
def ScaledUpsampleConv(name, input_dim, output_dim, filter_size, inputs, he_init=True, biases=True): output = inputs output = lib.ops.concat.concat([output, output, output, output], axis=1) output = tf.transpose(output, [0, 2, 3, 1]) output = tf.depth_to_space(output, 2) output = tf.transpose(output, [0, 3, 1, 2]) output = lib.ops.conv2d.Conv2D(name, input_dim, output_dim, filter_size, output, he_init=he_init, biases=biases, gain=0.5) return output
def layer_conv_dts(self, net, args, options): options = hc.Config(options) config = self.config ops = self.ops self.ops.activation_name = options.activation_name activation_s = options.activation or self.ops.config_option("activation") activation = self.ops.lookup(activation_s) stride = options.stride or self.ops.config_option("stride", [1,1])[0] stride = int(stride) fltr = options.filter or self.ops.config_option("filter", [3,3]) if type(fltr) == type(""): fltr=[int(fltr), int(fltr)] depth = int(args[0]) initializer = None # default to global trainable = True if options.trainable == 'false': trainable = False bias = True if options.bias == 'false': bias=False net = ops.conv2d(net, fltr[0], fltr[1], stride, stride, depth*4, initializer=initializer, trainable=trainable, bias=bias) s = ops.shape(net) net = tf.depth_to_space(net, 2) if activation: #net = self.layer_regularizer(net) net = activation(net) avg_pool = options.avg_pool or self.ops.config_option("avg_pool") if type(avg_pool) == type(""): avg_pool = [int(avg_pool), int(avg_pool)] if avg_pool: ksize = [1,avg_pool[0], avg_pool[1],1] stride = ksize net = tf.nn.avg_pool(net, ksize=ksize, strides=stride, padding='SAME') return net
def aspp_features(hlist, num_classes=19, alpha=1.0): ''' Args: hlist: list of three features, [1/8, 1/16, 1/32]. ''' h0, h1, h2 = hlist shape_h0 = combined_static_and_dynamic_shape(h0) shape_h1 = combined_static_and_dynamic_shape(h1) with ssdnet_argscope(): # merge h1 and h2, create 1/16 feature h2 = tf.depth_to_space(h2, 2) h12 = tf.concat([h1, h2], axis=-1) # 128 h12 = Conv2D('h12', h12, 256, 1, activation=BNReLU) with tf.variable_scope('top'): feat = Conv2D('conv1', h12, 256, 1, activation=BNReLU) with tf.variable_scope('se'): s = AvgPooling('avgpool', h12, 49, strides=(16, 20), padding='same') s = Conv2D('conv1', s, 256, 1, activation=None, use_bias=True) s = tf.sigmoid(s, name='sigmoid') s = tf.image.resize_bilinear(s, shape_h1[1:3], align_corners=True) feat = tf.multiply(feat, s) feat = tf.image.resize_bilinear(feat, shape_h0[1:3], align_corners=True) feat = DWConv('convd', feat, 5) feat_l = Conv2D('conv_h0', h0, 128, 1, activation=BNReLU) with argscope([Conv2D], use_bias=True): feat = Conv2D('logit_up', feat, num_classes, 1) feat_l = Conv2D('logit_h0', feat_l, num_classes, 1) out = tf.add(feat, alpha * feat_l, name='cls_logit') return out
def subpixel_LR2HR(image_LR): c = image_LR.shape[3] if c == 4: #64*50*50*4 img_HR = tf.depth_to_space(image_LR, 2) return img_HR elif c == 8: #64*50*50*8 img_HR1 = tf.depth_to_space(image_LR[:, :, :, 0:4], 2) img_HR2 = tf.depth_to_space(image_LR[:, :, :, 4:8], 2) return tf.concat([img_HR1, img_HR2], 3) elif c == 12: #64*50*50*12 img_HR1 = tf.depth_to_space(image_LR[:, :, :, 0:4], 2) img_HR2 = tf.depth_to_space(image_LR[:, :, :, 4:8], 2) img_HR3 = tf.depth_to_space(image_LR[:, :, :, 8:12], 2) return tf.concat([img_HR1, img_HR2, img_HR3], 3) else: print('ERROR!')
def model(self): d, m, b = self.model_params size = self.padding + 1 features = tf.contrib.layers.conv2d(self.images, d, size, 1, 'VALID', 'NHWC', activation_fn=None, scope='features') conv = tf.contrib.layers.conv2d(features, d, 3, 1, 'SAME', 'NHWC', activation_fn=None, scope='conv1') shortcuts = conv for i in range(1, m+1): with tf.variable_scope("FMM{}".format(i)) as scope: for bi in range(1, b+1): res = tf.contrib.layers.conv2d(conv, d*6, 1, 1, 'SAME', 'NHWC', activation_fn=tf.nn.leaky_relu, scope='widen{}'.format(bi)) res = tf.contrib.layers.conv2d(res, d, 1, 1, 'SAME', 'NHWC', activation_fn=None, scope='shrink{}'.format(bi)) res = tf.contrib.layers.conv2d(res, d, 3, 1, 'SAME', 'NHWC', activation_fn=None, scope='embedding{}'.format(bi)) sa = tf.contrib.layers.separable_conv2d(res, None, 3, 1, 1, 'SAME', 'NHWC', activation_fn=None, scope='sa{}'.format(bi)) ca = tf.reduce_mean(tf.square(res), [1, 2], True) - tf.square(tf.reduce_mean(res, [1, 2], True)) ca = tf.contrib.layers.conv2d(ca, max(d//16, 4), 1, 1, 'SAME', 'NHWC', activation_fn=tf.nn.leaky_relu, scope='ca_shrink{}'.format(bi)) ca = tf.contrib.layers.conv2d(ca, d, 1, 1, 'SAME', 'NHWC', activation_fn=None, scope='ca{}'.format(bi)) conv = tf.add(conv, tf.add(res, tf.multiply(res, tf.sigmoid(tf.add(sa, ca))))) conv = tf.concat([conv, shortcuts], -1) conv = tf.contrib.layers.conv2d(conv, d, 1, 1, 'SAME', 'NHWC', activation_fn=None, scope='GF{}'.format(i)) shortcuts = tf.concat([conv, shortcuts], -1) conv = tf.contrib.layers.conv2d(conv, d, 3, 1, 'SAME', 'NHWC', activation_fn=None, scope='res') conv = tf.add(conv, features) with tf.variable_scope("upscaling"): conv = tf.nn.leaky_relu(conv) conv = tf.contrib.layers.conv2d(conv, d * self.scale**2, 3, 1, 'SAME', 'NHWC', activation_fn=None, scope='sub-pixel_conv') conv = tf.depth_to_space(conv, self.scale, name='pixel_shuffle', data_format='NHWC') conv = tf.contrib.layers.conv2d(conv, 1, 3, 1, 'SAME', 'NHWC', activation_fn=None, scope='final') return conv
def local_head(features, config): bn_params = { 'center': True, 'scale': True } conv_params = { 'normalizer_fn': slim.batch_norm, 'activation_fn': tf.nn.relu6, 'stride': 1, 'padding': 'SAME', 'kernel_size': [3, 3], } last_conv_params = { 'normalizer_fn': None, 'activation_fn': None, 'stride': 1, 'padding': 'SAME', 'kernel_size': [1, 1], } with tf.variable_scope('descriptor', reuse=tf.AUTO_REUSE): with slim.arg_scope([slim.conv2d], **conv_params), \ slim.arg_scope([slim.batch_norm], **bn_params): desc = slim.conv2d(features, config['descriptor_dim']) with slim.arg_scope([slim.conv2d], **last_conv_params): desc = slim.conv2d(desc, config['descriptor_dim']) desc = tf.nn.l2_normalize(desc, -1) with tf.variable_scope('detector', reuse=tf.AUTO_REUSE): with slim.arg_scope([slim.conv2d], **conv_params), \ slim.arg_scope([slim.batch_norm], **bn_params): logits = slim.conv2d(features, 128) with slim.arg_scope([slim.conv2d], **last_conv_params): logits = slim.conv2d(logits, 1+pow(config['detector_grid'], 2)) prob_full = tf.nn.softmax(logits, axis=-1) prob = prob_full[:, :, :, :-1] # Strip the “no interest point” dustbin prob = tf.depth_to_space(prob, config['detector_grid']) prob = tf.squeeze(prob, axis=-1) return {'local_descriptor_map': desc, 'logits': logits, 'prob_full': prob_full, 'scores_dense': prob}
def dark_network(input): with tf.variable_scope("DARK") as vs: conv1 = slim.conv2d(input, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_1') conv1 = slim.conv2d(conv1, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_2') pool1 = slim.max_pool2d(conv1, [2, 2], padding='SAME') conv2 = slim.conv2d(pool1, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_1') conv2 = slim.conv2d(conv2, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_2') pool2 = slim.max_pool2d(conv2, [2, 2], padding='SAME') conv3 = slim.conv2d(pool2, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_1') conv3 = slim.conv2d(conv3, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_2') pool3 = slim.max_pool2d(conv3, [2, 2], padding='SAME') conv4 = slim.conv2d(pool3, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_1') conv4 = slim.conv2d(conv4, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_2') pool4 = slim.max_pool2d(conv4, [2, 2], padding='SAME') conv5 = slim.conv2d(pool4, 512, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv5_1') conv5 = slim.conv2d(conv5, 512, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv5_2') up6 = upsample_and_concat(conv5, conv4, 256, 512) conv6 = slim.conv2d(up6, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_1') conv6 = slim.conv2d(conv6, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_2') up7 = upsample_and_concat(conv6, conv3, 128, 256) conv7 = slim.conv2d(up7, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_1') conv7 = slim.conv2d(conv7, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_2') up8 = upsample_and_concat(conv7, conv2, 64, 128) conv8 = slim.conv2d(up8, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_1') conv8 = slim.conv2d(conv8, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_2') up9 = upsample_and_concat(conv8, conv1, 32, 64) conv9 = slim.conv2d(up9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_1') conv9 = slim.conv2d(conv9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_2') conv10 = slim.conv2d(conv9, 12, [1, 1], rate=1, activation_fn=None, scope='g_conv10') out = tf.depth_to_space(conv10, 2) return out
def branch_1_1(self, fcin1, helper1): ################################################################################################################ #### Branch_1_0: Input: RawImage Output: fc1: bottle layer output just before deconv #### #### helper1: concat of feature map with size H*W for final deconv #### ################################################################################################################ with tf.variable_scope('branch_1_1', reuse=self.reuse): deconv_1_1_0 = slim.conv2d_transpose( inputs=fcin1, num_outputs=128, kernel_size=[4, 4], stride=2, reuse=self.reuse, scope='deconv_1_1_0', activation_fn=model_tools.lrelu) conv_1_1_0 = slim.conv2d(inputs=deconv_1_1_0, num_outputs=128, kernel_size=self.kernel_size, scope='conv_1_1_0', reuse=self.reuse, activation_fn=model_tools.lrelu) dense_1_0, next_in_0 = model_tools.block(conv_1_1_0, self.growth_rate, self.layers_per_block, self.kernel_size_dense, self.reuse, 'dense_1_0') bottle_1_1 = slim.conv2d(inputs=tf.concat( [helper1, conv_1_1_0, dense_1_0], axis=3), num_outputs=self.bottle_output, kernel_size=1, scope='bottle_1_1', reuse=self.reuse, activation_fn=model_tools.lrelu) #conv_1_1_1 = bottle_1_1 conv_1_1_1 = slim.conv2d(inputs=bottle_1_1, num_outputs=48, kernel_size=self.kernel_size, scope='conv_1_1_1', reuse=self.reuse, activation_fn=model_tools.lrelu) conv_r = tf.depth_to_space( tf.depth_to_space(conv_1_1_1[:, :, :, :16], 2), 2) conv_g = tf.depth_to_space( tf.depth_to_space(conv_1_1_1[:, :, :, 16:32], 2), 2) conv_b = tf.depth_to_space( tf.depth_to_space(conv_1_1_1[:, :, :, 32:], 2), 2) rgb = tf.concat([conv_r, conv_g, conv_b], axis=3) return rgb
def ResNet1(input,reuse=False): current = input act = lrelu with tf.variable_scope('ResNet'): with tf.variable_scope('Restore-Net'): current=slim.conv2d(current,64,[3,3], activation_fn=act,scope='conv_init',reuse=reuse) for j in range(10): add = current current=slim.conv2d(current,64,[3,3], activation_fn=act,scope='conv_%d0'%(j),reuse=reuse) current=slim.conv2d(current,64,[3,3], activation_fn=act,scope='conv_%d1'%(j),reuse=reuse) current = current + add current=slim.conv2d(current,64,[3,3], activation_fn=act,scope='conv_final',reuse=reuse) restore = slim.conv2d(current,12,[1,1], activation_fn=None,scope='conv_subpixel',reuse=reuse) restore = tf.depth_to_space(restore,2) with tf.variable_scope('Enhance-Net'): current=slim.conv2d(restore,32,[3,3], activation_fn=act,scope='conv_init',reuse=reuse) for j in range(6): current=slim.conv2d(current,32,[3,3],rate=2**(i), activation_fn=act,scope='conv_%d0'%(j),reuse=reuse) current=slim.conv2d(current,32,[3,3], activation_fn=act,scope='conv_final0',reuse=reuse) enhance=slim.conv2d(current,3,[1,1], activation_fn=None,scope='conv_final1',reuse=reuse) return enhance
def network(input_image): c1 = sl.conv2d(input_image, 32, [3, 3], activation_fn=leaky_relu) c1 = sl.conv2d(c1, 32, [3, 3], activation_fn=leaky_relu) p1 = sl.max_pool2d(c1, [2, 2], padding='SAME') # Unit 2 c2 = sl.conv2d(p1, 64, [3, 3], activation_fn=leaky_relu) c2 = sl.conv2d(c2, 64, [3, 3], activation_fn=leaky_relu) p2 = sl.max_pool2d(c2, [2, 2], padding='SAME') # Unit 3 c3 = sl.conv2d(p2, 128, [3, 3], activation_fn=leaky_relu) c3 = sl.conv2d(c3, 128, [3, 3], activation_fn=leaky_relu) p3 = sl.max_pool2d(c3, [2, 2], padding='SAME') # Unit 4 c4 = sl.conv2d(p3, 256, [3, 3], activation_fn=leaky_relu) c4 = sl.conv2d(c4, 256, [3, 3], activation_fn=leaky_relu) p4 = sl.max_pool2d(c4, [2, 2], padding='SAME') # Unit 5 c5 = sl.conv2d(p4, 512, [3, 3], activation_fn=leaky_relu) c5 = sl.conv2d(c5, 512, [3, 3], activation_fn=leaky_relu) # Unit 6 uc6 = upsample_and_concat(c5, c4, 256, 512) c6 = sl.conv2d(uc6, 256, [3, 3], activation_fn=leaky_relu) c6 = sl.conv2d(c6, 256, [3, 3], activation_fn=leaky_relu) # Unit 7 uc7 = upsample_and_concat(c6, c3, 128, 256) c7 = sl.conv2d(uc7, 128, [3, 3], activation_fn=leaky_relu) c7 = sl.conv2d(c7, 128, [3, 3], activation_fn=leaky_relu) # Unit 8 uc8 = upsample_and_concat(c7, c2, 64, 128) c8 = sl.conv2d(uc8, 64, [3, 3], activation_fn=leaky_relu) c8 = sl.conv2d(c8, 64, [3, 3], activation_fn=leaky_relu) # Unit 9 uc9 = upsample_and_concat(c8, c1, 32, 64) c9 = sl.conv2d(uc9, 32, [3, 3], activation_fn=leaky_relu) c9 = sl.conv2d(c9, 32, [3, 3], activation_fn=leaky_relu) # Final Unit c10 = sl.conv2d(c9, 12, [1, 1], activation_fn=None) output_image = tf.depth_to_space(c10, 2) return output_image
def _upscale(self, inputs, scale): # According to this paper [https://arxiv.org/pdf/1609.05158.pdf] conv = self._conv2d_layer(inputs, filters_size=[5, 5, 64, 64], add_bias=True, name="upscale_{}_0".format(scale), activation=tf.nn.relu) conv = self._conv2d_layer(conv, filters_size=[3, 3, 64, 32], add_bias=True, name="upscale_{}_1".format(scale), activation=tf.nn.relu) conv = self._conv2d_layer( conv, filters_size=[3, 3, 32, self.n_channel * np.power(scale, 2)], add_bias=True, name="upscale_{}_2".format(scale)) upscaled_conv = tf.depth_to_space(conv, scale) return upscaled_conv
def u_net(input): c1 = layer.conv2d(input, 32, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv1_1') c1 = layer.conv2d(c1, 32, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv1_2') p1 = layer.max_pool2d(c1, [2, 2], padding='SAME') c2 = layer.conv2d(p1, 64, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv2_1') c2 = layer.conv2d(c2, 64, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv2_2') p2 = layer.max_pool2d(c2, [2, 2], padding='SAME') c3 = layer.conv2d(p2, 128, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv3_1') c3 = layer.conv2d(c3, 128, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv3_2') p3 = layer.max_pool2d(c3, [2, 2], padding='SAME') c4 = layer.conv2d(p3, 256, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv4_1') c4 = layer.conv2d(c4, 256, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv4_2') p4 = layer.max_pool2d(c4, [2, 2], padding='SAME') c5 = layer.conv2d(p4, 512, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv5_1') c5 = layer.conv2d(c5, 512, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv5_2') up6 = upsample_merge(c5, c4, 256, 512) c6 = layer.conv2d(up6, 256, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv6_1') c6 = layer.conv2d(c6, 256, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv6_2') up7 = upsample_merge(c6, c3, 128, 256) c7 = layer.conv2d(up7, 128, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv7_1') c7 = layer.conv2d(c7, 128, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv7_2') up8 = upsample_merge(c7, c2, 64, 128) c8 = layer.conv2d(up8, 64, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv8_1') c8 = layer.conv2d(c8, 64, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv8_2') up9 = upsample_merge(c8, c1, 32, 64) c9 = layer.conv2d(up9, 32, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv9_1') c9 = layer.conv2d(c9, 32, [3, 3], rate=1, activation_fn=tf.nn.leaky_relu, scope='g_conv9_2') c10 = layer.conv2d(c9, 12, [1, 1], rate=1, activation_fn=None, scope='g_conv10') out_image = tf.depth_to_space(c10, 2) return out_image
def network(input): # Unet conv1 = slim.conv2d(input, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_1') conv1 = slim.conv2d(conv1, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv1_2') pool1 = slim.max_pool2d(conv1, [2, 2], padding='SAME') conv2 = slim.conv2d(pool1, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_1') conv2 = slim.conv2d(conv2, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv2_2') pool2 = slim.max_pool2d(conv2, [2, 2], padding='SAME') conv3 = slim.conv2d(pool2, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_1') conv3 = slim.conv2d(conv3, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv3_2') pool3 = slim.max_pool2d(conv3, [2, 2], padding='SAME') conv4 = slim.conv2d(pool3, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_1') conv4 = slim.conv2d(conv4, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv4_2') pool4 = slim.max_pool2d(conv4, [2, 2], padding='SAME') conv5 = slim.conv2d(pool4, 512, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv5_1') conv5 = slim.conv2d(conv5, 512, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv5_2') up6 = upsample_and_concat(conv5, conv4, 256, 512) conv6 = slim.conv2d(up6, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_1') conv6 = slim.conv2d(conv6, 256, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv6_2') up7 = upsample_and_concat(conv6, conv3, 128, 256) conv7 = slim.conv2d(up7, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_1') conv7 = slim.conv2d(conv7, 128, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv7_2') up8 = upsample_and_concat(conv7, conv2, 64, 128) conv8 = slim.conv2d(up8, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_1') conv8 = slim.conv2d(conv8, 64, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv8_2') up9 = upsample_and_concat(conv8, conv1, 32, 64) conv9 = slim.conv2d(up9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_1') conv9 = slim.conv2d(conv9, 32, [3, 3], rate=1, activation_fn=lrelu, scope='g_conv9_2') conv10 = slim.conv2d(conv9, 27, [1, 1], rate=1, activation_fn=None, scope='g_conv10') out = tf.depth_to_space(conv10, 3) return out
def upsample(x, num_units, method="subpixel"): """ 2D upsampling layer. Parameters ---------- x: tensor input num_units: number of feature maps in the output method: upsampling method. A `string` from: `"conv_transposed", "nearest_neighbor", "linear", "subpixel"` Subpixel means that every upsampled pixel gets its own filter. Returns ------- upsampled input """ xs = x.shape.as_list() if method == "conv_transposed": return deconv2d(x, num_units, stride=[2, 2]) elif method == "subpixel": x = conv2d(x, 4 * num_units) x = tf.depth_to_space(x, 2) return x elif method == "nearest_neighbor": bs, h, w, c = x.shape.as_list() x = tf.image.resize_images(x, [2 * h, 2 * w], tf.image.ResizeMethod.NEAREST_NEIGHBOR) return x elif method == "linear": bs, h, w, c = xs[:4] x = tf.image.resize_images(x, [2 * h, 2 * w], tf.image.ResizeMethod.BILINEAR) return x else: raise NotImplemented(method)
def build_network( input , num_blocks , first_channels , alpha , name = 'Default' ) : def conv_layer( input , shp , name , strides = [1,1,1,1] , padding = 'SAME' ) : filters = tf.get_variable( name + '/filters' , shp , initializer = tf.truncated_normal_initializer( stddev = 0.05 ) ) biases = tf.get_variable( name + '/biases' , [ shp[-1] ] , initializer = tf.constant_initializer(0) ) output = tf.nn.conv2d( input , filters , strides = strides , padding = padding ) + biases return output def residual_block( input , in_channels , out_channels , name ) : conv1 = conv_layer( input , [3,3,in_channels,out_channels] , name + '/conv1' ) relu1 = tf.nn.relu(conv1) conv2 = conv_layer( relu1 , [3,3,out_channels,out_channels] , name + '/conv2' ) relu2 = tf.nn.relu(conv2) if in_channels != out_channels : tmp = tf.pad( input , [ [0,0] , [0,0] , [0,0] , [0,out_channels - in_channels] ] , "CONSTANT" ) output = tmp + relu2 print(in_channels , '->' , out_channels) return output current = conv_layer( input , [3,3,3,first_channels] , name + '/first_layer' ) pre = first_channels for i in range(num_blocks) : cur = pre + int( ( float(i+1) * alpha ) / num_blocks + 0.5 ) current = residual_block( current , pre , cur , name + '/residual_block' + str(i) ) pre = cur current = conv_layer( current , [3,3,pre,75] , name + '/last_layer' ) current = tf.depth_to_space( current , 5 ) output = conv_layer( current , [3,3,3,3] , name + '/down_sampling_layer' , strides = (1,2,2,1) ) return output
def forward(self, x, is_train): # shape of x: [B,T_in,H,W,C] # Generate filters and residual # Fx: [B,1,H,W,1*5*5,R*R] # Rx: [B,1,H,W,3*R*R] with tf.variable_scope('G', reuse=tf.AUTO_REUSE) as scope: Fx, Rx = FR_52L(x, is_train) x_c = [] for c in range(3): t = DynFilter3D(x[:, self.num_frames // 2:self.num_frames // 2 + 1, :, :, c], Fx[:, 0, :, :, :, :], [1, 5, 5]) # [B,H,W,R*R] t = tf.depth_to_space(t, self.scale) # [B,H*R,W*R,1] x_c += [t] x = tf.concat(x_c, axis=3) # [B,H*R,W*R,3] x = tf.expand_dims(x, axis=1) Rx = depth_to_space_3D(Rx, self.scale) # [B,1,H*R,W*R,3] x += Rx return x
def generator(z, label): with tf.variable_scope('generator', reuse=None): d = 16 z = tf.concat([z, label], axis=1) h0 = tf.layers.dense(z, units=d * d * 64) h0 = tf.reshape(h0, shape=[-1, d, d, 64]) h0 = tf.nn.relu(batch_norm(h0)) shortcut = h0 for i in range(16): h0 = g_block(h0) h0 = tf.nn.relu(batch_norm(h0)) h0 = tf.add(h0, shortcut) for i in range(3): h0 = conv2d(h0, 3, 256, 1, use_bias=False) h0 = tf.depth_to_space(h0, 2) h0 = tf.nn.relu(batch_norm(h0)) h0 = tf.layers.conv2d(h0, kernel_size=9, filters=3, strides=1, padding='same', activation=tf.nn.tanh, name='g', use_bias=True) return h0
def construct_model(self): with self.graph.as_default(): x = tf.split(self.x, 4, axis=3) x1 = x[0] x2 = x[1] x3 = x[2] x4 = x[3] conv4_1 = self.CNN(x1) conv4_2 = self.CNN(x2, True) conv4_3 = self.CNN(x3, True) conv4_4 = self.CNN(x4, True) f1 = self.SPP(conv4_1) f2 = self.SPP(conv4_2, True) f3 = self.SPP(conv4_3, True) f4 = self.SPP(conv4_4, True) cost_vol = self.cost_vol(f1, f2, f3, f4) output = self.CNN3D(cost_vol, type="hourglass") with tf.name_scope('{}'.format(self.scoped_name)): self.yy = tf.depth_to_space(output, 2) self.y = tf.clip_by_value(self.yy, 0, 1, name='{}/y'.format(self.scoped_name))
def subpixel_upsample(in_, scale=4, n_feature=256): with slim.arg_scope([slim.conv2d], kernel_size=3, stride=1, padding='SAME', activation_fn=None): if scale == 4: x = slim.conv2d(in_, n_feature * 2 ** 2) x = tf.depth_to_space(x, 2) x = slim.conv2d(x, n_feature * 2 ** 2) x = tf.depth_to_space(x, 2) elif scale == 8: x = slim.conv2d(in_, n_feature * 2 ** 2) x = tf.depth_to_space(x, 2) x = slim.conv2d(x, n_feature * 2 ** 2) x = tf.depth_to_space(x, 2) x = slim.conv2d(x, n_feature * 2 ** 2) x = tf.depth_to_space(x, 2) else: x = slim.conv2d(in_, n_feature * scale ** 2) x = tf.depth_to_space(x, 2) return x
def generator_block(self, inputs, out_dim, name='generator_block'): """ Args: inputs: out_dim: name: Return: """ with tf.variable_scope(name): output = tf.concat(values=[inputs, inputs, inputs, inputs], axis=3) output = tf.depth_to_space(output, block_size=2) output = lib.ops.conv2d.Conv2D(output, output.shape.as_list()[-1], out_dim, 3, 1, 'Conv.1', inputs_norm=self.inputs_norm, he_init=True, biases=True) output = lib.ops.pixelnorm.Pixelnorm(output) output = lrelu(output) output = lib.ops.conv2d.Conv2D(output, output.shape.as_list()[-1], out_dim, 3, 1, 'Conv.2', inputs_norm=self.inputs_norm, he_init=True, biases=True) output = lib.ops.pixelnorm.Pixelnorm(output) output = lrelu(output) return output
def UpsampleConv(name, input_dim, output_dim, filter_size, inputs, biases=True, with_sn=False, with_learnable_sn_scale=False, update_collection=None): output = inputs output = tf.concat([output, output, output, output], axis=1) output = tf.transpose(output, [0, 2, 3, 1]) output = tf.depth_to_space(output, 2) output = tf.transpose(output, [0, 3, 1, 2]) output = conv2d.Conv2D(name, input_dim, output_dim, filter_size, output, biases=biases, with_sn=with_sn, with_learnable_sn_scale=with_learnable_sn_scale, update_collection=update_collection) return output
def depth_to_space(input, block_size, channels_last=True, name=None): """ Wraps :func:`tf.depth_to_space`, to support tensors higher than 4-d. Args: input: The input tensor, at least 4-d. block_size (int): An int >= 2, the size of the spatial block. channels_last (bool): Whether or not the channels axis is the last axis in the input tensor? Returns: tf.Tensor: The output tensor. See Also: :func:`tf.depth_to_space` """ block_size = int(block_size) data_format = 'NHWC' if channels_last else 'NCHW' input = tf.convert_to_tensor(input) with tf.name_scope(name or 'space_to_depth', values=[input]): output, s1, s2 = flatten_to_ndims(input, ndims=4) output = tf.depth_to_space(output, block_size, data_format=data_format) output = unflatten_from_ndims(output, s1, s2) return output
def unet(net, inp, nch, inch, nlev, pfx='u', _ch=None): out = inp if _ch is None: out = conv(net, pfx + 'u1_%d' % nlev, out, [3, nch], 0, True) else: out = conv(net, pfx + 'u1_%d' % nlev, out, [3, nch, _ch], 0, True) out = conv(net, 'u2_%d' % nlev, out, [3, nch], 0, True) if nlev == 1: return out out1 = tf.nn.max_pool(out, [1, 2, 2, 1], [1, 2, 2, 1], 'VALID') out1 = unet(net, out1, nch + inch, inch, nlev - 1, pfx) out1 = conv(net, pfx + 'u3_%d' % nlev, out1, [1, nch * 4], 0, True) out1 = tf.depth_to_space(out1, 2) out = tf.concat([out, out1], 3) out = conv(net, 'u4_%d' % nlev, out, [3, nch], 0, True) out = conv(net, pfx + 'u5_%d' % nlev, out, [3, nch], 0, True) return out
def UpsampleConv(opts, input, input_dim, output_dim, filter_size, scope=None, init='he', biases=True): output = input output = tf.concat([output, output, output, output], axis=-1) # concat along channel axis # output = tf.concat([output, output, output, output], axis=1) # output = tf.transpose(output, [0,2,3,1]) output = tf.depth_to_space(output, 2) # output = tf.transpose(output, [0,3,1,2]) output = Conv2d(opts, output, input_dim, output_dim, filter_size, scope=scope, init=init, biases=biases) return output
def UpsampleConv(name, input_dim, output_dim, filter_size, inputs, he_init=True, biases=True, spectralnorm=False, update_collection=None): output = inputs output = tf.concat([output, output, output, output], axis=1) output = tf.transpose(output, [0, 2, 3, 1]) output = tf.depth_to_space(output, 2) output = tf.transpose(output, [0, 3, 1, 2]) output = lib.ops.conv2d.Conv2D(name, input_dim, output_dim, filter_size, output, he_init=he_init, biases=biases, spectralnorm=spectralnorm, update_collection=update_collection) return output
def depth_to_space(inputs, block_size, name='d2s', data_format='channels_last'): """ 1d, 2d and 3d depth_to_space transformation. Parameters ---------- inputs : tf.Tensor a tensor to resize block_size : int An int that is >= 2. The size of the spatial block name : str scope name data_format : {'channels_last', 'channels_first'} position of the channels dimension Returns ------- tf.Tensor See also -------- `tf.depth_to_space <https://www.tensorflow.org/api_docs/python/tf/depth_to_space>`_ """ dim = inputs.shape.ndims - 2 if dim == 2: dafo = 'NHWC' if data_format == 'channels_last' else 'NCHW' return tf.depth_to_space(inputs, block_size, name, data_format=dafo) if data_format == 'channels_first': inputs = tf.transpose(inputs, [0] + list(range(2, dim + 2)) + [1]) x = _depth_to_space(inputs, block_size, name) if data_format == 'channels_first': x = tf.transpose(x, [0, dim + 1] + list(range(1, dim + 1))) return x
def phase_shift(x, upsampling_factor=2, name="PhaseShift"): return tf.depth_to_space(x, upsampling_factor, name=name)
def _testOne(self, inputs, block_size, outputs): for use_gpu in [False, True]: with self.test_session(use_gpu=use_gpu): x_tf = tf.depth_to_space(tf.to_float(inputs), block_size) self.assertAllEqual(x_tf.eval(), outputs)
def decompress_seqcnn(x, targets, targets_vocab_size, dilations_and_kernels, block_size, is_2d=False, embedding_var=None, name=None, reuse=None): """Decompress x into targets size using a Sequence CNN at every element.""" with tf.variable_scope( name, default_name="decompress_batch_seqcnn", values=[x, targets], reuse=reuse): # We assume targets are [batch x block_size * N x block_size * N x C] if # is_2d=True or [batch, block_size * N, 1, C] otherwise, and C is static. # Let's shift targets to depth and embed. targets_shape, targets_shape_static = tf.shape(targets), targets.get_shape() channels = int(targets_shape_static[-1]) hidden_size = int(x.get_shape()[-1]) if is_2d: depth_targets = tf.space_to_depth(targets, block_size) factor = channels * block_size * block_size else: depth_targets = tf.reshape(targets, [ targets_shape[0], targets_shape[1] // block_size, 1, channels * block_size ]) factor = channels * block_size if embedding_var is None: embedding_var = tf.get_variable("targets_embedding", [targets_vocab_size, hidden_size]) targets_emb = tf.gather(embedding_var, depth_targets) # Flatten x and embedded targets. Flat targets are factor* larger on axis=1. flat_x = tf.reshape(x, [-1, 1, 1, hidden_size]) flat_targets = tf.reshape(targets_emb, [-1, factor, 1, hidden_size]) shifted_targets = shift_left(flat_targets) # Run a SeqCNN large-batch to produce factor outputs out of every target. flat_x += tf.zeros_like(shifted_targets) # Broadcast on axis=1. flat_outputs = conv_block( tf.concat([flat_x, shifted_targets], axis=3), hidden_size, dilations_and_kernels, padding="LEFT") # Reshape back to embedded targets shape. outputs = tf.reshape(flat_outputs, [ tf.shape(targets_emb)[0], tf.shape(targets_emb)[1], tf.shape(targets_emb)[2], factor * hidden_size ]) # Move depth back to target space. if is_2d: outputs = tf.depth_to_space(outputs, 2) else: outputs = tf.reshape(outputs, [ tf.shape(outputs)[0], block_size * tf.shape(outputs)[1], 1, hidden_size ]) # Final reshape before prediction to ensure target size. outputs = tf.reshape(outputs, [ targets_shape[0], targets_shape[1], targets_shape[2], channels, hidden_size ]) return tf.layers.dense(outputs, targets_vocab_size)
def apply_depth_to_space(input_node, train, parameters, options): """Construct a TensorFlow depth_to_space node.""" return tf.depth_to_space(input_node, options["block_size"])
def call(self, x, mask=None): y = tf.depth_to_space(x, self.scale_factor, self.data_format) return y
def position_sensitive_crop_regions(image, boxes, box_ind, crop_size, num_spatial_bins, global_pool, extrapolation_value=None): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 4-D tensor of shape `[batch, image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. The `i`-th row of the tensor specifies the coordinates of a box in the `box_ind[i]` image and is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. Normalized coordinates outside the `[0, 1]` range are allowed, in which case we use `extrapolation_value` to extrapolate the input image values. box_ind: A `Tensor` of type `int32`. A 1-D tensor of shape `[num_boxes]` with int32 values in `[0, batch)`. The value of `box_ind[i]` specifies the image that the `i`-th box refers to. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. extrapolation_value: An optional `float`. Defaults to `0`. Value used for extrapolation, when applicable. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError('crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=3) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): crop = tf.image.crop_and_resize(split, box, box_ind, bin_crop_size, extrapolation_value=extrapolation_value) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( position_sensitive_features, [1, 2], keep_dims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [tf.space_to_depth( crop, block_size=block_size) for crop in image_crops] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze( tf.batch_to_space_nd(position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), squeeze_dims=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.depth_to_space( position_sensitive_features, block_size=block_size) return position_sensitive_features
def testBasic(self): x_np = [[[[1, 2, 3, 4]]]] with self.test_session(use_gpu=False): block_size = 2 x_tf = tf.depth_to_space(x_np, block_size) self.assertAllEqual(x_tf.eval(), [[[[1], [2]], [[3], [4]]]])
def _testOne(self, inputs, block_size, outputs): with self.test_session(): x_tf = tf.depth_to_space(tf.to_float(inputs), block_size) self.assertAllEqual(x_tf.eval(), outputs)
def testUnknownShape(self): t = tf.depth_to_space(tf.placeholder(tf.float32), block_size=4) self.assertEqual(4, t.get_shape().ndims)