def testInputWrongDimMissingBatch(self): # The input is missing the first dimension ("batch") x_np = [[[1], [2]], [[3], [4]]] crops = np.zeros((2, 2), dtype=np.int32) block_size = 2 with self.assertRaises(ValueError): _ = tf.batch_to_space(x_np, crops, block_size)
def testBlockSizeSquaredNotDivisibleBatch(self): # The block size squared does not divide the batch. x_np = [[[[1], [2], [3]], [[3], [4], [7]]]] crops = np.zeros((2, 2), dtype=np.int32) block_size = 3 with self.assertRaises(IndexError): _ = tf.batch_to_space(x_np, crops, block_size)
def group_batch_images(x): sz = x.get_shape().as_list() num_cols = int(math.sqrt(sz[0])) img = tf.slice(x, [0,0,0,0],[num_cols ** 2, -1, -1, -1]) img = tf.batch_to_space(img, [[0,0],[0,0]], num_cols) return img
def testBlockSizeOne(self): # The block size is 1. The block size needs to be > 1. x_np = [[[[1], [2]], [[3], [4]]]] crops = np.zeros((2, 2), dtype=np.int32) block_size = 1 with self.assertRaises(ValueError): out_tf = tf.batch_to_space(x_np, crops, block_size) out_tf.eval()
def testBlockSizeLarger(self): # The block size is too large for this input. x_np = [[[[1], [2]], [[3], [4]]]] crops = np.zeros((2, 2), dtype=np.int32) block_size = 10 with self.assertRaises(IndexError): out_tf = tf.batch_to_space(x_np, crops, block_size) out_tf.eval()
def testAtrousSequence(self): """Tests optimization of sequence of atrous convolutions. Verifies that a sequence of `atrous_conv2d` operations with identical `rate` parameters, 'SAME' `padding`, and `filters` with odd heights/ widths: net = atrous_conv2d(net, filters1, rate, padding="SAME") net = atrous_conv2d(net, filters2, rate, padding="SAME") ... net = atrous_conv2d(net, filtersK, rate, padding="SAME") is equivalent to: pad = ... # padding so that the input dims are multiples of rate net = space_to_batch(net, paddings=pad, block_size=rate) net = conv2d(net, filters1, strides=[1, 1, 1, 1], padding="SAME") net = conv2d(net, filters2, strides=[1, 1, 1, 1], padding="SAME") ... net = conv2d(net, filtersK, strides=[1, 1, 1, 1], padding="SAME") net = batch_to_space(net, crops=pad, block_size=rate) """ padding = "SAME" # The padding needs to be "SAME" np.random.seed(1) # Make it reproducible. default_graph_controller = env.g.as_default() default_graph_controller.__enter__() with self.test_session(): # Input: [batch, height, width, input_depth] # for height in range(15, 17): for height in range(15, 16): # for width in range(15, 17): for width in range(15, 16): x_shape = [3, height, width, 2] x = np.random.random_sample(x_shape).astype(np.float32) for kernel in [1]: # The kernel size needs to be odd. # for kernel in [1, 3, 5]: # The kernel size needs to be odd. # Filter: [kernel_height, kernel_width, input_depth, output_depth] f_shape = [kernel, kernel, 2, 2] f = 1e-2 * np.random.random_sample(f_shape).astype(np.float32) for rate in range(2, 3): # for rate in range(2, 4): # y1: three atrous_conv2d in a row. y1 = tf.nn.atrous_conv2d(x, f, rate, padding=padding) y1 = tf.nn.atrous_conv2d(y1, f, rate, padding=padding) y1 = tf.nn.atrous_conv2d(y1, f, rate, padding=padding) # y2: space_to_batch, three conv2d in a row, batch_to_space pad_bottom = 0 if height % rate == 0 else rate - height % rate pad_right = 0 if width % rate == 0 else rate - width % rate pad = [[0, pad_bottom], [0, pad_right]] y2 = tf.space_to_batch(x, paddings=pad, block_size=rate) y2 = tf.nn.conv2d(y2, f, strides=[1, 1, 1, 1], padding=padding) y2 = tf.nn.conv2d(y2, f, strides=[1, 1, 1, 1], padding=padding) y2 = tf.nn.conv2d(y2, f, strides=[1, 1, 1, 1], padding=padding) y2 = tf.batch_to_space(y2, crops=pad, block_size=rate) self.assertAllClose(y1.eval(), y2.eval(), rtol=1e-2, atol=1e-2)
def makeImageSummary(tag,image,opt): with tf.name_scope("imageSummary"): blockSize = opt.visBlockSize imageSlice = tf.slice(image,[0,0,0,0],[blockSize**2,-1,-1,-1]) imageOne = tf.batch_to_space(imageSlice,crops=[[0,0],[0,0]],block_size=blockSize) imagePermute = tf.reshape(imageOne,[opt.H,blockSize,opt.W,blockSize,1]) imageTransp = tf.transpose(imagePermute,[1,0,3,2,4]) imageBlocks = tf.reshape(imageTransp,[1,opt.H * blockSize,opt.W * blockSize,1]) tf.summary.image(tag,imageBlocks)
def _testPad(self, inputs, paddings, block_size, outputs): for use_gpu in [False, True]: with self.test_session(use_gpu=use_gpu): # outputs = space_to_batch(inputs) x_tf = tf.space_to_batch(tf.to_float(inputs), paddings, block_size=block_size) self.assertAllEqual(x_tf.eval(), outputs) # inputs = batch_to_space(outputs) x_tf = tf.batch_to_space(tf.to_float(outputs), paddings, block_size=block_size) self.assertAllEqual(x_tf.eval(), inputs)
def testDepthToSpaceTranspose(self): x = np.arange(20 * 5 * 8 * 7, dtype=np.float32).reshape([20, 5, 8, 7]) block_size = 2 crops = np.zeros((2, 2), dtype=np.int32) y1 = tf.batch_to_space(x, crops, block_size=block_size) y2 = tf.transpose( tf.depth_to_space( tf.transpose(x, [3, 1, 2, 0]), block_size=block_size), [3, 1, 2, 0]) with self.test_session(): self.assertAllEqual(y1.eval(), y2.eval())
def upscale2d(x, n): """Box upscaling (also called nearest neighbors). Args: x: 4D tensor in NHWC format. n: integer scale (must be a power of 2). Returns: 4D tensor up scaled by a factor n. """ if n == 1: return x return tf.batch_to_space(tf.tile(x, [n**2, 1, 1, 1]), [[0, 0], [0, 0]], n)
def _checkGrad(self, x, crops, block_size): assert 4 == x.ndim with self.test_session(): tf_x = tf.convert_to_tensor(x) tf_y = tf.batch_to_space(tf_x, crops, block_size) epsilon = 1e-5 ((x_jacob_t, x_jacob_n)) = tf.test.compute_gradient( tf_x, x.shape, tf_y, tf_y.get_shape().as_list(), x_init_value=x, delta=epsilon) self.assertAllClose(x_jacob_t, x_jacob_n, rtol=1e-2, atol=epsilon)
def atrous_conv2d(value, filters, rate, name): """ Returns the result of a convolution with holes from value and filters. Do not use the tensorflow implementation because of issues with shape definition of the result. The semantic is the same. It uses only the "VALID" padding. Warning: this implementation is PGNet specific. It's used only to define the last convolutional layer and therefore depends on pgnet constants """ pad_top = 0 pad_bottom = 0 pad_left = 0 pad_right = 0 in_height = value.get_shape()[1].value + pad_top + pad_bottom in_width = value.get_shape()[2].value + pad_left + pad_right # More padding so that rate divides the height and width of the input. pad_bottom_extra = (rate - in_height % rate) % rate pad_right_extra = (rate - in_width % rate) % rate # The paddings argument to space_to_batch includes both padding components. space_to_batch_pad = ((pad_top, pad_bottom + pad_bottom_extra), (pad_left, pad_right + pad_right_extra)) value = tf.space_to_batch( input=value, paddings=space_to_batch_pad, block_size=rate) value = tf.nn.conv2d( input=value, filter=filters, strides=(1, LAST_CONV_OUTPUT_STRIDE, LAST_CONV_OUTPUT_STRIDE, 1), padding="VALID", name=name) # The crops argument to batch_to_space is just the extra padding component. batch_to_space_crop = ((0, pad_bottom_extra), (0, pad_right_extra)) value = tf.batch_to_space( input=value, crops=batch_to_space_crop, block_size=rate) return value
def upscale(images, scale): """Box upscaling (also called nearest neighbors) of images. Args: images: A 4D `Tensor` in NHWC format. scale: A positive integer scale. Returns: A 4D `Tensor` of `images` up scaled by a factor `scale`. Raises: ValueError: If `scale` is not a positive integer. """ scale = _get_validated_scale(scale) if scale == 1: return images return tf.batch_to_space(tf.tile(images, [scale**2, 1, 1, 1]), crops=[[0, 0], [0, 0]], block_size=scale)
def _smoothed_dilated_conv2d_GI(x, kernel_size, num_o, dilation_factor, name, top_scope, biased=False): """ Smoothed dilated conv2d via the Group Interaction (GI) layer without BN or relu. """ # padding so that the input dims are multiples of dilation_factor H = tf.shape(x)[1] W = tf.shape(x)[2] pad_bottom = (dilation_factor - H % dilation_factor) if H % dilation_factor != 0 else 0 pad_right = (dilation_factor - W % dilation_factor) if W % dilation_factor != 0 else 0 pad = [[0, pad_bottom], [0, pad_right]] # decomposition to smaller-sized feature maps # [N,H,W,C] -> [N*d*d, H/d, W/d, C] o = tf.space_to_batch(x, paddings=pad, block_size=dilation_factor) # perform regular conv2d num_x = x.shape[3].value with tf.variable_scope(name) as scope: w = tf.get_variable('weights', shape=[kernel_size, kernel_size, num_x, num_o]) s = [1, 1, 1, 1] o = tf.nn.conv2d(o, w, s, padding='SAME') fix_w = tf.Variable(tf.eye(dilation_factor * dilation_factor), name='fix_w') l = tf.split(o, dilation_factor * dilation_factor, axis=0) os = [] for i in six.moves.range(0, dilation_factor * dilation_factor): os.append(fix_w[0, i] * l[i]) for j in six.moves.range(1, dilation_factor * dilation_factor): os[i] += fix_w[j, i] * l[j] o = tf.concat(os, axis=0) if biased: b = tf.get_variable('biases', shape=[num_o]) o = tf.nn.bias_add(o, b) o = tf.batch_to_space(o, crops=pad, block_size=dilation_factor) return o
def upscale(images, scale): """Box upscaling (also called nearest neighbors) of images. Args: images: A 4D `Tensor` in NHWC format. scale: A positive integer scale. Returns: A 4D `Tensor` of `images` up scaled by a factor `scale`. Raises: ValueError: If `scale` is not a positive integer. """ scale = _get_validated_scale(scale) if scale == 1: return images return tf.batch_to_space( tf.tile(images, [scale**2, 1, 1, 1]), crops=[[0, 0], [0, 0]], block_size=scale)
def forward_decoder(self, h, vars): """ :param x: :return: """ idx = 0 op = h # layer1/2/3, factor=2,1,0 for idx in range(0, self.factor * 4, 4): # step=4 op = tf.nn.conv2d(op, vars[idx], strides=(1,1,1,1), padding='SAME') # print(vars[idx].name, vars[idx+1].name) op = tf.nn.bias_add(op, vars[idx + 1]) op = tf.nn.leaky_relu(op) op = tf.nn.conv2d(op, vars[idx + 2], strides=(1,1,1,1), padding='SAME') op = tf.nn.bias_add(op, vars[idx + 3]) op = tf.nn.leaky_relu(op) op = tf.batch_to_space(tf.tile(op, [2 ** 2, 1, 1, 1]), [[0, 0], [0, 0]], 2) # update variable pointer idx = self.factor * 4 # layer4 op = tf.nn.conv2d(op, vars[idx + 0], strides=(1,1,1,1), padding='SAME') op = tf.nn.bias_add(op, vars[idx + 1]) op = tf.nn.leaky_relu(op) idx += 2 # layer5 op = tf.nn.conv2d(op, vars[idx + 0], strides=(1,1,1,1), padding='SAME') op = tf.nn.bias_add(op, vars[idx + 1]) # op = tf.nn.sigmoid(op) idx += 2 assert idx == len(vars) return op
def DilatedConv_GI(x, k, num_out, factor, name, biased=False): H = tf.shape(x)[1] W = tf.shape(x)[2] pad_bottom = (factor - H % factor) if H % factor != 0 else 0 pad_right = (factor - W % factor) if W % factor != 0 else 0 pad = [[0, pad_bottom], [0, pad_right]] num_input = x.shape[3].value with tf.variable_scope(name) as scope: w = tf.get_variable('weights', shape=[k, k, num_input, num_out]) s = [1, 1, 1, 1] output = tf.nn.conv2d(output, w, s, padding='SAME') fix_w = tf.Variable(tf.eye(factor * factor), name='fix_w') l = tf.split(output, factor * factor, axis=0) os = [] for i in six.moves.range(0, factor * factor): os.append(fix_w[0, i] * l[i]) for j in six.moves.range(1, factor * factor): os[i] += fix_w[j, i] * l[j] output = tf.concat(os, axis=0) if biased: bias = tf.get_variable('biases', shape=[num_out]) output = tf.nn.bias_add(output, bias) output = tf.batch_to_space(output, crops=pad, block_size=factor) return output
def vol3d_encoder(self, x, name='Vol3D_Encoder'): with argscope([Conv3D], kernel_shape=3, padding='SAME', use_bias=False, nl=tf.nn.leaky_relu): # x = x - VGG19_MEAN_TENSOR x = tf_2tanh(x) # x = x/255.0 x = tf.expand_dims(x, axis=0) # to 1 256 256 256 3 x = tf.transpose(x, [4, 1, 2, 3, 0]) # x = ( LinearWrap(x).Conv3D('conv1a', 16, strides=2, padding='SAME') # .Conv3D('conv2a', 32, strides=2, padding='SAME') # .Conv3D('conv3a', 64, strides=2, padding='SAME') # .Conv3D('conv4a', 128, strides=2, padding='SAME') # .Conv3D('conv5a', 256, strides=2, padding='SAME') # .Conv3D('conv6a', 1024, strides=2, padding='SAME', use_bias=True, nl=tf.tanh) # 4x4x4x1024 ()) x = tf.transpose(x, [4, 1, 2, 3, 0]) ## x = tf.reshape(x, [-1, 4, 4, 3]) # x = tf.batch_to_space(x, crops=[[0, 0], [0, 0]], block_size=64, name='b2s') # x = x*255.0 x = tf_2imag(x) x = INLReLU(x) # x = x + VGG19_MEAN_TENSOR return x
def tf_batch_to_space(inputs, crops=[[0, 0], [0, 0]], block_size=2, name=None): return tf.batch_to_space(inputs, crops, block_size, name)
def pixel_shuffle(x, scale): x = tf.transpose(x, [2, 1, 0]) x = tf.batch_to_space(x, [scale], [[0, 0]]) x = tf.transpose(x, [2, 1, 0]) return x
import tensorflow as tf in_ = tf.compat.v1.placeholder(tf.float32, shape=[4, 1, 1, 1], name="Hole") cr_ = tf.constant([[0, 0], [0, 0]], name="Hole") op_ = tf.batch_to_space(in_, cr_, 2)
def _PS(self, I, r): X = tf.transpose(a=I, perm=[2, 1, 0]) # (r, w, b) X = tf.batch_to_space(X, [r], [[0, 0]]) # (1, r*w, b) X = tf.transpose(a=X, perm=[2, 1, 0]) return X
tf.constant() tf.convert_to_tensor() tf.convert_to_tensor_or_indexed_slices() tf.convert_to_tensor_or_sparse_tensor() tf.decode_base64() tf.decode_csv() tf.decode_json_example() tf.decode_raw() tf.device() tf.diag() tf.diag_part() tf.div() tf.divide() tf.batch_to_space_nd() tf.space_to_batch_nd() tf.batch_to_space() tf.space_to_batch() tf.depth_to_space() tf.space_to_depth() tf.dtypes tf.get_collection() tf.get_collection_ref() tf.get_default_session() tf.get_local_variable tf.get_seed() tf.get_session_handle() tf.get_session_tensor() tf.get_default_graph()
def batch_to_space_execution(): with tf.Graph().as_default(), tf.Session(): a = tf.constant([[[[1]]], [[[2]]], [[[3]]], [[[4]]]]) bts = tf.batch_to_space(a, crops=[[0, 0], [0, 0]], block_size=2) print(bts.eval())
def testUnknownShape(self): t = tf.batch_to_space(tf.placeholder(tf.float32), tf.placeholder(tf.int32), block_size=4) self.assertEqual(4, t.get_shape().ndims)
def build(self, rgb, train=False, num_classes=20, random_init_fc8=False, debug=False, use_dilated=False): """ Build the VGG model using loaded weights Parameters ---------- rgb: image batch tensor Image in rgb shap. Scaled to Intervall [0, 255] train: bool Whether to build train or inference graph num_classes: int How many classes should be predicted (by fc8) random_init_fc8 : bool Whether to initialize fc8 layer randomly. Finetuning is required in this case. debug: bool Whether to print additional Debug Information. """ # Convert RGB to BGR with tf.name_scope('Processing'): red, green, blue = tf.split(rgb, 3, 3) # assert red.get_shape().as_list()[1:] == [224, 224, 1] # assert green.get_shape().as_list()[1:] == [224, 224, 1] # assert blue.get_shape().as_list()[1:] == [224, 224, 1] bgr = tf.concat([ blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2], ], 3) if debug: bgr = tf.Print(bgr, [tf.shape(bgr)], message='Shape of input image: ', summarize=4, first_n=1) self.conv1_1 = self._conv_layer(bgr, "conv1_1") self.conv1_2 = self._conv_layer(self.conv1_1, "conv1_2") self.pool1 = self._max_pool(self.conv1_2, 'pool1', debug) self.conv2_1 = self._conv_layer(self.pool1, "conv2_1") self.conv2_2 = self._conv_layer(self.conv2_1, "conv2_2") self.pool2 = self._max_pool(self.conv2_2, 'pool2', debug) self.conv3_1 = self._conv_layer(self.pool2, "conv3_1") self.conv3_2 = self._conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self._conv_layer(self.conv3_2, "conv3_3") self.pool3 = self._max_pool(self.conv3_3, 'pool3', debug) self.conv4_1 = self._conv_layer(self.pool3, "conv4_1") self.conv4_2 = self._conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self._conv_layer(self.conv4_2, "conv4_3") if use_dilated: pad = [[0, 0], [0, 0]] self.pool4 = tf.nn.max_pool(self.conv4_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4') self.pool4 = tf.space_to_batch(self.pool4, paddings=pad, block_size=2) else: self.pool4 = self._max_pool(self.conv4_3, 'pool4', debug) self.conv5_1 = self._conv_layer(self.pool4, "conv5_1") self.conv5_2 = self._conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self._conv_layer(self.conv5_2, "conv5_3") if use_dilated: pad = [[0, 0], [0, 0]] self.pool5 = tf.nn.max_pool(self.conv5_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool5') self.pool5 = tf.space_to_batch(self.pool5, paddings=pad, block_size=2) else: self.pool5 = self._max_pool(self.conv5_3, 'pool5', debug) self.fc6 = self._fc_layer(self.pool5, "fc6") if train: self.fc6 = tf.nn.dropout(self.fc6, 0.5) self.fc7 = self._fc_layer(self.fc6, "fc7") if train: self.fc7 = tf.nn.dropout(self.fc7, 0.5) if use_dilated: self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) return if random_init_fc8: self.score_fr = self._score_layer(self.fc7, "score_fr", num_classes) else: self.score_fr = self._fc_layer(self.fc7, "score_fr", num_classes=num_classes, relu=False) self.pred = tf.argmax(self.score_fr, dimension=3) self.upscore2 = self._upscore_layer(self.score_fr, shape=tf.shape(self.pool4), num_classes=num_classes, debug=debug, name='upscore2', ksize=4, stride=2) self.score_pool4 = self._score_layer(self.pool4, "score_pool4", num_classes=num_classes) self.fuse_pool4 = tf.add(self.upscore2, self.score_pool4) self.upscore4 = self._upscore_layer(self.fuse_pool4, shape=tf.shape(self.pool3), num_classes=num_classes, debug=debug, name='upscore4', ksize=4, stride=2) self.score_pool3 = self._score_layer(self.pool3, "score_pool3", num_classes=num_classes) self.fuse_pool3 = tf.add(self.upscore4, self.score_pool3) self.upscore32 = self._upscore_layer(self.fuse_pool3, shape=tf.shape(bgr), num_classes=num_classes, debug=debug, name='upscore32', ksize=16, stride=8) self.pred_up = tf.argmax(self.upscore32, dimension=3)
def build(self, rgb, keep_prob, is_CAF, Netvlad_K=32, Netvlad_centers=None, is_training=False, num_classes=20, random_init_fc8=False, debug=False, use_dilated=False): """ Build the VGG model using loaded weights Parameters ---------- rgb: image batch tensor Image in rgb shap. Scaled to Intervall [0, 255] train: bool Whether to build train or inference graph num_classes: int How many classes should be predicted (by fc8) random_init_fc8 : bool Whether to initialize fc8 layer randomly. Finetuning is required in this case. debug: bool Whether to print additional Debug Information. """ # Convert RGB to BGR with tf.name_scope('Processing'): red, green, blue = tf.split(rgb, 3, 3) bgr = tf.concat([blue, green, red], 3) if debug: bgr = tf.Print(bgr, [tf.shape(bgr)], message='Shape of input image: ', summarize=4, first_n=1) with tf.variable_scope('vgg_16'): with tf.variable_scope('conv1'): self.conv1_1 = self._conv_layer(bgr, "conv1_1") self.conv1_2 = self._conv_layer(self.conv1_1, "conv1_2") self.pool1 = self._max_pool(self.conv1_2, 'pool1', debug) with tf.variable_scope('conv2'): self.conv2_1 = self._conv_layer(self.pool1, "conv2_1") self.conv2_2 = self._conv_layer(self.conv2_1, "conv2_2") self.pool2 = self._max_pool(self.conv2_2, 'pool2', debug) with tf.variable_scope('conv3'): self.conv3_1 = self._conv_layer(self.pool2, "conv3_1") self.conv3_2 = self._conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self._conv_layer(self.conv3_2, "conv3_3") self.pool3 = self._max_pool(self.conv3_3, 'pool3', debug) with tf.variable_scope('conv4'): self.conv4_1 = self._conv_layer(self.pool3, "conv4_1") self.conv4_2 = self._conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self._conv_layer(self.conv4_2, "conv4_3") if use_dilated: pad = [[0, 0], [0, 0]] self.pool4 = tf.nn.max_pool(self.conv4_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4') self.pool4 = tf.space_to_batch(self.pool4, paddings=pad, block_size=2) else: self.pool4 = self._max_pool(self.conv4_3, 'pool4', debug) with tf.variable_scope('conv5'): self.conv5_1 = self._conv_layer(self.pool4, "conv5_1") self.conv5_2 = self._conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self._conv_layer(self.conv5_2, "conv5_3") if use_dilated: pad = [[0, 0], [0, 0]] self.pool5 = tf.nn.max_pool(self.conv5_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool5') self.pool5 = tf.space_to_batch(self.pool5, paddings=pad, block_size=2) else: self.pool5 = self._max_pool(self.conv5_3, 'pool5', debug) self.fc6 = self._fc_layer(self.pool5, "fc6") if is_training: self.fc6 = tf.nn.dropout(self.fc6, keep_prob) self.fc7 = self._fc_layer(self.fc6, "fc7") if is_training: self.fc7 = tf.nn.dropout(self.fc7, keep_prob) if use_dilated: self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) return if is_training: variables_to_restore = tf.contrib.slim.get_variables_to_restore() print('-----------------------------------------------') print('Assigning VGG checkpoint.') tf.train.init_from_checkpoint(self.vgg16_ckpt_path, {v.name.split(':')[0]: v for v in variables_to_restore}) print('-----------------------------------------------') if random_init_fc8: self.score_fr = self._score_layer(self.fc7, "score_fr", num_classes) else: self.score_fr = self._fc_layer(self.fc7, "score_fr", num_classes=num_classes, relu=False) self.pred = tf.argmax(self.score_fr, axis=3) self.upscore2 = self._upscore_layer(self.score_fr, shape=tf.shape(self.pool4), num_classes=num_classes, debug=debug, name='upscore2', ksize=4, stride=2) self.score_pool4 = self._score_layer(self.pool4, "score_pool4", num_classes=num_classes) self.fuse_pool4 = tf.add(self.upscore2, self.score_pool4) self.upscore4 = self._upscore_layer(self.fuse_pool4, shape=tf.shape(self.pool3), num_classes=num_classes, debug=debug, name='upscore4', ksize=4, stride=2) self.score_pool3 = self._score_layer(self.pool3, "score_pool3", num_classes=num_classes) self.fuse_pool3 = tf.add(self.upscore4, self.score_pool3) self.upscore_input = self.fuse_pool3 if is_CAF: caf_input = self.upscore_input #print('caf_input', caf_input) caf_size = tf.shape(caf_input)[1:3] caf_n_out = num_classes if Netvlad_centers is None: centers = None else: centers = np.load(Netvlad_centers)['centers'].T with tf.variable_scope("vlad"): vlad_features = netvlad.vlad_pooling_layer(inputs=caf_input, K=Netvlad_K, centers=centers) n_flat = vlad_features.get_shape().as_list()[1] weights = tf.Variable(tf.truncated_normal(shape=[n_flat, caf_n_out], mean=0.0, stddev=0.01), name='weights') biases = tf.Variable(tf.constant(0.01, shape=[caf_n_out]), name='biases') caf_features = tf.matmul(vlad_features, weights) caf_features = tf.nn.bias_add(caf_features, biases, name='fc') caf_features = tf.expand_dims(caf_features, axis=1) caf_features = tf.expand_dims(caf_features, axis=1) caf_features = tf.image.resize_bilinear(caf_features, caf_size, name='upsample') #print('caf_features', caf_features) self.fuse_pool3_caf = tf.concat([caf_input, caf_features], axis=3) #print('fuse_pool3_caf', self.fuse_pool3_caf) self.upscore_input = self.fuse_pool3_caf self.upscore = self._upscore_layer(self.upscore_input, shape=tf.shape(bgr), num_classes=num_classes, debug=debug, name='upscore32', ksize=16, stride=8) self.pred_up = tf.argmax(self.upscore, axis=3)
filters = tf.zeros([2, 2, 3, 3], tf.float32) # [filter_height, filter_width, in_channels, out_channels]. filters' in_channels dimension must match that of value rate = 2 # dilation rate padding = 'SAME' # padding algorithm conv_layer = tf.nn.atrous_conv2d(value=value, filters=filters, rate=rate, padding=padding) res1 = sess.run(conv_layer) # 2 create equivalent atrous convolution layer pad = [[0,0],[0,0]] # padding so that the input dims are multiples of rate stb = tf.space_to_batch(value, paddings=pad, block_size=rate) conv = tf.nn.conv2d(stb, filter=filters, strides=[1, 1, 1, 1], padding="SAME") bts = tf.batch_to_space(conv, crops=pad, block_size=rate) res2 = sess.run(bts) # 3 create my space to batch def my_space_to_batch(): # 1 - Zero-pad # 2 - Reshape # 3 - Permute # 4 - Reshape
def build(self, rgb, train=False, num_classes=20, random_init_fc8=False, num_obj_classes=20, use_dilated=False, is_recognition=False, is_shallow=False, debug=False): """ Build the VGG model using loaded weights Parameters ---------- rgb: image batch tensor Image in rgb shap. Scaled to Intervall [0, 255] train: bool Whether to build train or inference graph num_classes: int How many classes should be predicted (by fc8) random_init_fc8 : bool Whether to initialize fc8 layer randomly. Finetuning is required in this case. debug: bool Whether to print additional Debug Information. """ # Convert RGB to BGR with tf.name_scope('Processing'): red, green, blue = tf.split(rgb, 3, 3) # assert red.get_shape().as_list()[1:] == [224, 224, 1] # assert green.get_shape().as_list()[1:] == [224, 224, 1] # assert blue.get_shape().as_list()[1:] == [224, 224, 1] bgr = tf.concat([ blue - VGG_MEAN[0], green - VGG_MEAN[1], red - VGG_MEAN[2], ], 3) if debug: bgr = tf.Print(bgr, [tf.shape(bgr)], message='Shape of input image: ', summarize=4, first_n=1) self.conv1_1 = self._conv_layer(bgr, "conv1_1") self.conv1_2 = self._conv_layer(self.conv1_1, "conv1_2") self.pool1 = self._max_pool(self.conv1_2, 'pool1', debug) self.conv2_1 = self._conv_layer(self.pool1, "conv2_1") self.conv2_2 = self._conv_layer(self.conv2_1, "conv2_2") self.pool2 = self._max_pool(self.conv2_2, 'pool2', debug) self.conv3_1 = self._conv_layer(self.pool2, "conv3_1") self.conv3_2 = self._conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self._conv_layer(self.conv3_2, "conv3_3") self.pool3 = self._max_pool(self.conv3_3, 'pool3', debug) self.conv4_1 = self._conv_layer(self.pool3, "conv4_1") self.conv4_2 = self._conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self._conv_layer(self.conv4_2, "conv4_3") if use_dilated: pad = [[0, 0], [0, 0]] self.pool4 = tf.nn.max_pool(self.conv4_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4') self.pool4 = tf.space_to_batch(self.pool4, paddings=pad, block_size=2) else: self.pool4 = self._max_pool(self.conv4_3, 'pool4', debug) self.conv5_1 = self._conv_layer(self.pool4, "conv5_1") self.conv5_2 = self._conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self._conv_layer(self.conv5_2, "conv5_3") if use_dilated: pad = [[0, 0], [0, 0]] self.pool5 = tf.nn.max_pool(self.conv5_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool5') self.pool5 = tf.space_to_batch(self.pool5, paddings=pad, block_size=2) else: self.pool5 = self._max_pool(self.conv5_3, 'pool5', debug) self.fc6 = self._fc_layer(self.pool5, "fc6") if train: self.fc6 = tf.nn.dropout(self.fc6, 0.5) self.fc7 = self._fc_layer(self.fc6, "fc7") if train: self.fc7 = tf.nn.dropout(self.fc7, 0.5) if use_dilated: self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) return if random_init_fc8: self.score_fr = self._score_layer(self.fc7, "score_fr", num_classes) else: self.score_fr = self._fc_layer(self.fc7, "score_fr", num_classes=num_classes, relu=False) # object classification # add flatten layer if is_recognition: if is_shallow: self.flatten = tf.layers.flatten(self.fc7, name="flatten") print('Layer name: %s' % (self.flatten.name)) print('Layer shape: %s' % str(self.flatten.shape)) # fc layer for object prediction self.pred = tf.layers.dense(inputs=self.flatten, units=num_obj_classes, trainable=train, name="pred") print('Layer name: %s' % (self.pred.name)) print('Layer shape: %s' % str(self.pred.shape)) else: self.flatten = tf.layers.flatten(self.pool5, name="flatten") print('Layer name: %s' % (self.flatten.name)) print('Layer shape: %s' % str(self.flatten.shape)) self.dense6 = tf.layers.dense(inputs=self.flatten, units=4096, trainable=train, name="dense6") print('Layer name: %s' % (self.dense6.name)) print('Layer shape: %s' % str(self.dense6.shape)) self.dense7 = tf.layers.dense(inputs=self.flatten, units=512, trainable=train, name="dense7") print('Layer name: %s' % (self.dense7.name)) print('Layer shape: %s' % str(self.dense7.shape)) # fc layer for object prediction self.pred = tf.layers.dense(inputs=self.dense7, units=num_obj_classes, trainable=train, name="pred") print('Layer name: %s' % (self.pred.name)) print('Layer shape: %s' % str(self.pred.shape)) # self.pred = self._fc_layer(self.fc7, "fc8", # num_classes=num_obj_classes, # relu=False) # self.pred = tf.argmax(self.score_fr, dimension=3) # self.pred = tf.argmax(self.fc8, dimension=3) self.upscore2 = self._upscore_layer(self.score_fr, shape=tf.shape(self.pool4), num_classes=num_classes, debug=debug, name='upscore2', ksize=4, stride=2) self.score_pool4 = self._score_layer(self.pool4, "score_pool4", num_classes=num_classes) self.fuse_pool4 = tf.add(self.upscore2, self.score_pool4) self.upscore4 = self._upscore_layer(self.fuse_pool4, shape=tf.shape(self.pool3), num_classes=num_classes, debug=debug, name='upscore4', ksize=4, stride=2) self.score_pool3 = self._score_layer(self.pool3, "score_pool3", num_classes=num_classes) self.fuse_pool3 = tf.add(self.upscore4, self.score_pool3) self.upscore32 = self._upscore_layer(self.fuse_pool3, shape=tf.shape(bgr), num_classes=num_classes, debug=debug, name='upscore32', ksize=16, stride=8)
def build(self, rgb, label_num, train_mode=None, last_layer_type="softmax"): """ load variable from npy to build the Resnet or Generate a new one :param rgb: rgb image [batch, height, width, 3] values scaled [0, 1] :param train_mode: a bool tensor, usually a placeholder: if True, dropout will be turned on """ self.train_mode = train_mode red, green, blue = tf.split(axis=3, num_or_size_splits=3, value=rgb) assert red.get_shape().as_list()[1:] == [224, 224, 1] assert green.get_shape().as_list()[1:] == [224, 224, 1] assert blue.get_shape().as_list()[1:] == [224, 224, 1] bgr = tf.concat(axis=3, values=[ blue - configs['VGG_MEAN'][0], green - configs['VGG_MEAN'][1], red - configs['VGG_MEAN'][2], ]) print(bgr.get_shape().as_list()) assert bgr.get_shape().as_list()[1:] == [224, 224, 3] self.conv1_1 = self.Conv_Relu(name="conv1_1", bottom=bgr, out_channels=64, kernel_size=3, stride=1) self.conv1_2 = self.Conv_Relu(name="conv1_2", bottom=self.conv1_1, out_channels=64, kernel_size=3, stride=1) #224 224 self.pool1 = self.max_pool(self.conv1_2, kernel_size=2, stride=2, name="pool1") self.conv2_1 = self.Conv_Relu(name="conv2_1", bottom=self.pool1, out_channels=128, kernel_size=3, stride=1) self.conv2_2 = self.Conv_Relu(name="conv2_2", bottom=self.conv2_1, out_channels=128, kernel_size=3, stride=1) #112 112 self.pool2 = self.max_pool(self.conv2_2, kernel_size=2, stride=2, name="pool2") self.conv3_1 = self.Conv_Relu(name="conv3_1", bottom=self.pool2, out_channels=256, kernel_size=3, stride=1) self.conv3_2 = self.Conv_Relu(name="conv3_2", bottom=self.conv3_1, out_channels=256, kernel_size=3, stride=1) self.conv3_3 = self.Conv_Relu(name="conv3_3", bottom=self.conv3_2, out_channels=256, kernel_size=3, stride=1) #56 56 self.pool3 = self.max_pool(self.conv3_3, kernel_size=2, stride=2, name="pool3") self.conv4_1 = self.Conv_Relu(name="conv4_1", bottom=self.pool3, out_channels=512, kernel_size=3, stride=1) self.conv4_2 = self.Conv_Relu(name="conv4_2", bottom=self.conv4_1, out_channels=512, kernel_size=3, stride=1) self.conv4_3 = self.Conv_Relu(name="conv4_3", bottom=self.conv4_2, out_channels=512, kernel_size=3, stride=1) #28 28 self.pool4 = self.max_pool(self.conv4_3, kernel_size=2, stride=2, name="pool4") self.conv5_1 = self.Conv_Relu(name="conv5_1", bottom=self.pool4, out_channels=512, kernel_size=3, stride=1) self.conv5_2 = self.Conv_Relu(name="conv5_2", bottom=self.conv5_1, out_channels=512, kernel_size=3, stride=1) self.conv5_3 = self.Conv_Relu(name="conv5_3", bottom=self.conv5_2, out_channels=512, kernel_size=3, stride=1) #14 14 self.pool5 = self.max_pool(self.conv5_3, kernel_size=2, stride=2, name="pool5") #512 7 7 def fc_layer(self, bottom, out_channels, relu): input_shape = bottom.get_shape().as_list() conv = self.conv_layer(bottom=bottom, kernel_size=kernel_size, in_channels=input_shape[-1], out_channels=output_channels, stride=stride, name=name) if relu == True: relu = tf.nn.relu(conv) else: relu = conv return relu self.fc6 = self._fc_layer(self.pool5, "fc6") if train: self.fc6 = tf.nn.dropout(self.fc6, 0.5) self.fc7 = self._fc_layer(self.fc6, "fc7") if train: self.fc7 = tf.nn.dropout(self.fc7, 0.5) if use_dilated: self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) return if random_init_fc8: self.score_fr = self._score_layer(self.fc7, "score_fr", num_classes) else: self.score_fr = self._fc_layer(self.fc7, "score_fr", num_classes=num_classes, relu=False) self.pred = tf.argmax(self.score_fr, dimension=3) self.upscore2 = self._upscore_layer(self.score_fr, shape=tf.shape(self.pool4), num_classes=num_classes, debug=debug, name='upscore2', ksize=4, stride=2) self.score_pool4 = self._score_layer(self.pool4, "score_pool4", num_classes=num_classes) self.fuse_pool4 = tf.add(self.upscore2, self.score_pool4) self.upscore4 = self._upscore_layer(self.fuse_pool4, shape=tf.shape(self.pool3), num_classes=num_classes, debug=debug, name='upscore4', ksize=4, stride=2) self.score_pool3 = self._score_layer(self.pool3, "score_pool3", num_classes=num_classes) self.fuse_pool3 = tf.add(self.upscore4, self.score_pool3) self.upscore32 = self._upscore_layer(self.fuse_pool3, shape=tf.shape(bgr), num_classes=num_classes, debug=debug, name='upscore32', ksize=16, stride=8) self.pred_up = tf.argmax(self.upscore32, dimension=3) self.y_soft = tf.nn.softmax(self.final_layer) output_shape = self.final_layer.get_shape().as_list() self.logits = tf.reshape(self.final_layer, [output_shape[0], -1, 21]) # self.logits = tf.reshape(self.final_layer, (-1, 3)) self.pred = tf.argmax(self.y_soft, axis=3)
''' n = 10 # images is a 1 x 10 x 10 x 1 array that contains the numbers 1 through 100 in order images = [[[[x * n + y + 1] for y in range(n)] for x in range(n)]] ''' images = [[[[1]]], [[[2]]], [[[3]]], [[[4]]]] const1 = tf.constant(np.array(images), dtype=tf.int32) print(const1) print(tf.shape(const1)) tfutil.print_constant(const1) corps = [[0, 0], [0, 0]] blocksize = 2 bts_const1 = tf.batch_to_space(const1, corps, blocksize) print(bts_const1) print(tf.shape(bts_const1)) tfutil.print_operation_value(bts_const1) x = [[[1, 2, 3]], [[4, 5, 6]], [[7, 8, 9]], [[10, 11, 12]]] const2 = tf.constant(np.array(images), dtype=tf.int32) print(const2) print(tf.shape(const2)) tfutil.print_constant(const2) corps = [[0, 0], [0, 0]] blocksize = 2 bts_const2 = tf.batch_to_space(const2, corps, blocksize) print(bts_const2) print(tf.shape(bts_const2))
def position_sensitive_crop_regions(image, boxes, crop_size, num_spatial_bins, global_pool): """Position-sensitive crop and pool rectangular regions from a feature grid. The output crops are split into `spatial_bins_y` vertical bins and `spatial_bins_x` horizontal bins. For each intersection of a vertical and a horizontal bin the output values are gathered by performing `tf.image.crop_and_resize` (bilinear resampling) on a a separate subset of channels of the image. This reduces `depth` by a factor of `(spatial_bins_y * spatial_bins_x)`. When global_pool is True, this function implements a differentiable version of position-sensitive RoI pooling used in [R-FCN detection system](https://arxiv.org/abs/1605.06409). When global_pool is False, this function implements a differentiable version of position-sensitive assembling operation used in [instance FCN](https://arxiv.org/abs/1603.08678). Args: image: A `Tensor`. Must be one of the following types: `uint8`, `int8`, `int16`, `int32`, `int64`, `half`, `float32`, `float64`. A 3-D tensor of shape `[image_height, image_width, depth]`. Both `image_height` and `image_width` need to be positive. boxes: A `Tensor` of type `float32`. A 2-D tensor of shape `[num_boxes, 4]`. Each box is specified in normalized coordinates `[y1, x1, y2, x2]`. A normalized coordinate value of `y` is mapped to the image coordinate at `y * (image_height - 1)`, so as the `[0, 1]` interval of normalized image height is mapped to `[0, image_height - 1] in image height coordinates. We do allow y1 > y2, in which case the sampled crop is an up-down flipped version of the original image. The width dimension is treated similarly. crop_size: A list of two integers `[crop_height, crop_width]`. All cropped image patches are resized to this size. The aspect ratio of the image content is not preserved. Both `crop_height` and `crop_width` need to be positive. num_spatial_bins: A list of two integers `[spatial_bins_y, spatial_bins_x]`. Represents the number of position-sensitive bins in y and x directions. Both values should be >= 1. `crop_height` should be divisible by `spatial_bins_y`, and similarly for width. The number of image channels should be divisible by (spatial_bins_y * spatial_bins_x). Suggested value from R-FCN paper: [3, 3]. global_pool: A boolean variable. If True, we perform average global pooling on the features assembled from the position-sensitive score maps. If False, we keep the position-pooled features without global pooling over the spatial coordinates. Note that using global_pool=True is equivalent to but more efficient than running the function with global_pool=False and then performing global average pooling. Returns: position_sensitive_features: A 4-D tensor of shape `[num_boxes, K, K, crop_channels]`, where `crop_channels = depth / (spatial_bins_y * spatial_bins_x)`, where K = 1 when global_pool is True (Average-pooled cropped regions), and K = crop_size when global_pool is False. Raises: ValueError: Raised in four situations: `num_spatial_bins` is not >= 1; `num_spatial_bins` does not divide `crop_size`; `(spatial_bins_y*spatial_bins_x)` does not divide `depth`; `bin_crop_size` is not square when global_pool=False due to the constraint in function space_to_depth. """ total_bins = 1 bin_crop_size = [] for (num_bins, crop_dim) in zip(num_spatial_bins, crop_size): if num_bins < 1: raise ValueError('num_spatial_bins should be >= 1') if crop_dim % num_bins != 0: raise ValueError( 'crop_size should be divisible by num_spatial_bins') total_bins *= num_bins bin_crop_size.append(crop_dim // num_bins) if not global_pool and bin_crop_size[0] != bin_crop_size[1]: raise ValueError('Only support square bin crop size for now.') ymin, xmin, ymax, xmax = tf.unstack(boxes, axis=1) spatial_bins_y, spatial_bins_x = num_spatial_bins # Split each box into spatial_bins_y * spatial_bins_x bins. position_sensitive_boxes = [] for bin_y in range(spatial_bins_y): step_y = (ymax - ymin) / spatial_bins_y for bin_x in range(spatial_bins_x): step_x = (xmax - xmin) / spatial_bins_x box_coordinates = [ ymin + bin_y * step_y, xmin + bin_x * step_x, ymin + (bin_y + 1) * step_y, xmin + (bin_x + 1) * step_x, ] position_sensitive_boxes.append(tf.stack(box_coordinates, axis=1)) image_splits = tf.split(value=image, num_or_size_splits=total_bins, axis=2) image_crops = [] for (split, box) in zip(image_splits, position_sensitive_boxes): if split.shape.is_fully_defined() and box.shape.is_fully_defined(): crop = tf.squeeze(matmul_crop_and_resize( tf.expand_dims(split, axis=0), tf.expand_dims(box, axis=0), bin_crop_size), axis=0) else: crop = tf.image.crop_and_resize( tf.expand_dims(split, 0), box, tf.zeros(tf.shape(input=boxes)[0], dtype=tf.int32), bin_crop_size) image_crops.append(crop) if global_pool: # Average over all bins. position_sensitive_features = tf.add_n(image_crops) / len(image_crops) # Then average over spatial positions within the bins. position_sensitive_features = tf.reduce_mean( input_tensor=position_sensitive_features, axis=[1, 2], keepdims=True) else: # Reorder height/width to depth channel. block_size = bin_crop_size[0] if block_size >= 2: image_crops = [ tf.compat.v1.space_to_depth(input=crop, block_size=block_size) for crop in image_crops ] # Pack image_crops so that first dimension is for position-senstive boxes. position_sensitive_features = tf.stack(image_crops, axis=0) # Unroll the position-sensitive boxes to spatial positions. position_sensitive_features = tf.squeeze(tf.batch_to_space( input=position_sensitive_features, block_shape=[1] + num_spatial_bins, crops=tf.zeros((3, 2), dtype=tf.int32)), axis=[0]) # Reorder back the depth channel. if block_size >= 2: position_sensitive_features = tf.compat.v1.depth_to_space( input=position_sensitive_features, block_size=block_size) return position_sensitive_features
def testAtrousSequence(self): """Tests optimization of sequence of atrous convolutions. Verifies that a sequence of `atrous_conv2d` operations with identical `rate` parameters, 'SAME' `padding`, and `filters` with odd heights/ widths: net = atrous_conv2d(net, filters1, rate, padding="SAME") net = atrous_conv2d(net, filters2, rate, padding="SAME") ... net = atrous_conv2d(net, filtersK, rate, padding="SAME") is equivalent to: pad = ... # padding so that the input dims are multiples of rate net = space_to_batch(net, paddings=pad, block_size=rate) net = conv2d(net, filters1, strides=[1, 1, 1, 1], padding="SAME") net = conv2d(net, filters2, strides=[1, 1, 1, 1], padding="SAME") ... net = conv2d(net, filtersK, strides=[1, 1, 1, 1], padding="SAME") net = batch_to_space(net, crops=pad, block_size=rate) """ padding = "SAME" # The padding needs to be "SAME" np.random.seed(1) # Make it reproducible. with self.test_session(): # Input: [batch, height, width, input_depth] for height in range(15, 17): for width in range(15, 17): x_shape = [3, height, width, 2] x = np.random.random_sample(x_shape).astype(np.float32) for kernel in [1, 3, 5]: # The kernel size needs to be odd. # Filter: [kernel_height, kernel_width, input_depth, output_depth] f_shape = [kernel, kernel, 2, 2] f = 1e-2 * np.random.random_sample(f_shape).astype( np.float32) for rate in range(2, 4): # y1: three atrous_conv2d in a row. y1 = tf.nn.atrous_conv2d(x, f, rate, padding=padding) y1 = tf.nn.atrous_conv2d(y1, f, rate, padding=padding) y1 = tf.nn.atrous_conv2d(y1, f, rate, padding=padding) # y2: space_to_batch, three conv2d in a row, batch_to_space pad_bottom = 0 if height % rate == 0 else rate - height % rate pad_right = 0 if width % rate == 0 else rate - width % rate pad = [[0, pad_bottom], [0, pad_right]] y2 = tf.space_to_batch(x, paddings=pad, block_size=rate) y2 = tf.nn.conv2d(y2, f, strides=[1, 1, 1, 1], padding=padding) y2 = tf.nn.conv2d(y2, f, strides=[1, 1, 1, 1], padding=padding) y2 = tf.nn.conv2d(y2, f, strides=[1, 1, 1, 1], padding=padding) y2 = tf.batch_to_space(y2, crops=pad, block_size=rate) self.assertAllClose(y1.eval(), y2.eval(), rtol=1e-2, atol=1e-2)
def build(inputs, labels, weights, is_training=True, needs_vgg=False): if needs_vgg: vgg_layers, vgg_layer_names = read_vgg_init(FLAGS.vgg_init_dir) weight_decay = 5e-4 bn_params = { # Decay for the moving averages. 'decay': 0.999, 'center': True, 'scale': True, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # None to force the updates 'updates_collections': None, 'is_training': is_training, } with tf.contrib.framework.arg_scope( [layers.convolution2d], kernel_size=3, stride=1, padding='SAME', rate=1, activation_fn=tf.nn.relu, # normalizer_fn=layers.batch_norm, normalizer_params=bn_params, # weights_initializer=layers.variance_scaling_initializer(), normalizer_fn=None, weights_initializer=None, weights_regularizer=layers.l2_regularizer(weight_decay)): net = layers.convolution2d(inputs, 64, scope='conv1_1') net = layers.convolution2d(net, 64, scope='conv1_2') net = layers.max_pool2d(net, 2, 2, scope='pool1') net = layers.convolution2d(net, 128, scope='conv2_1') net = layers.convolution2d(net, 128, scope='conv2_2') net = layers.max_pool2d(net, 2, 2, scope='pool2') net = layers.convolution2d(net, 256, scope='conv3_1') net = layers.convolution2d(net, 256, scope='conv3_2') net = layers.convolution2d(net, 256, scope='conv3_3') paddings = [[0, 0], [0, 0]] crops = [[0, 0], [0, 0]] block_size = 2 net = tf.space_to_batch(net, paddings=paddings, block_size=block_size) net = layers.convolution2d(net, 512, scope='conv4_1') net = layers.convolution2d(net, 512, scope='conv4_2') net = layers.convolution2d(net, 512, scope='conv4_3') net = tf.batch_to_space(net, crops=crops, block_size=block_size) block_size = 4 net = tf.space_to_batch(net, paddings=paddings, block_size=block_size) net = layers.convolution2d(net, 512, scope='conv5_1') net = layers.convolution2d(net, 512, scope='conv5_2') net = layers.convolution2d(net, 512, scope='conv5_3') net = tf.batch_to_space(net, crops=crops, block_size=block_size) with tf.contrib.framework.arg_scope( [layers.convolution2d], stride=1, padding='SAME', weights_initializer=layers.variance_scaling_initializer(), activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=bn_params, weights_regularizer=layers.l2_regularizer(FLAGS.weight_decay)): net = layers.convolution2d( net, 512, kernel_size=3, scope='conv6_1', rate=4) logits = layers.convolution2d( net, FLAGS.num_classes, 1, padding='SAME', activation_fn=None, scope='unary_2', rate=2) print('logits', logits.get_shape()) logits = tf.image.resize_bilinear( logits, [FLAGS.img_height, FLAGS.img_width], name='resize_score') loss = get_loss(logits, labels, weights, is_training=is_training) if is_training and needs_vgg: init_op, init_feed = create_init_op(vgg_layers) return logits, loss, init_op, init_feed return logits, loss
def batch_to_space(*args, **kwargs): return tf.batch_to_space(*args, **kwargs)
def _build_dilated(image, is_training=False): #image = tf.Print(image, [tf.shape(image)], message='img_shape = ', summarize=10) bn_params['is_training'] = is_training with arg_scope([layers.conv2d], data_format=data_format, stride=1, padding='SAME', activation_fn=None, normalizer_fn=None, normalizer_params=None, weights_initializer=init_func, biases_initializer=None, weights_regularizer=layers.l2_regularizer(weight_decay)): with tf.variable_scope('conv0'): net = layers.conv2d(image, 2*growth, 7, stride=2) #net = layers.conv2d(image, 2*growth, 7, stride=1) # TODO net = tf.contrib.layers.batch_norm(net, **bn_params) net = tf.nn.relu(net) net = layers.max_pool2d(net, 2, stride=2, padding='SAME', data_format=data_format, scope='pool0') skip_layers = [] # no diff with double BN from orig densenet, first=True net = dense_block(net, block_sizes[0], growth, 'block0', is_training, first=True) #net, skip = dense_block(net, block_sizes[0], growth, 'block0', is_training, # first=True, split=True) #skip_layers.append([skip, 256, growth_up, 'block0_mid_refine', depth]) #skip_layers.append([skip, up_sizes[0], growth_up, 'block0_mid_refine']) skip_layers.append([net, up_sizes[0], growth_up, 'block0_refine']) net, _ = transition(net, compression, 'block0/transition') #skip_layers.append([skip, up_sizes[0], growth_up, 'block0_refine']) #net, skip = dense_block(net, block_sizes[1], growth, 'block1', is_training, split=True) #skip_layers.append([skip, up_sizes[1], growth_up, 'block1_mid_refine']) net = dense_block(net, block_sizes[1], growth, 'block1', is_training) skip_layers.append([net, up_sizes[1], growth_up, 'block1_refine']) net, _ = transition(net, compression, 'block1/transition') #skip_layers.append([skip, up_sizes[1], growth_up, 'block1_refine']) # works the same with split, not 100% #context_pool_num = 3 #context_pool_num = 4 context_pool_num = 5 #net, skip = dense_block(net, block_sizes[2], growth, 'block2', is_training, split=True) #skip_layers.append([skip, up_sizes[2], growth_up, 'block2_mid_refine']) net = dense_block(net, block_sizes[2], growth, 'block2', is_training) #skip_layers.append([net, up_sizes[3], growth_up, 'block2_refine']) #skip_layers.append([net, up_sizes[2], growth_up, 'block2_refine']) net, _ = transition(net, compression, 'block2/transition', stride=1) bsz = 2 paddings, crops = tf.required_space_to_batch_paddings(image_size(net), [bsz, bsz]) net = tf.space_to_batch(net, paddings=paddings, block_size=bsz) net = dense_block(net, block_sizes[3], growth, 'block3', is_training) net = tf.batch_to_space(net, crops=crops, block_size=bsz) print('before context = ', net) with tf.variable_scope('head'): net = BNReluConv(net, 512, 'bottleneck', k=1) net = _pyramid_pooling(net, size=context_pool_num) #net = BNReluConv(net, context_size, 'context_conv', k=3) print('Before upsampling: ', net) all_logits = [net] for skip_layer in reversed(skip_layers): net = refine(net, skip_layer) all_logits.append(net) print('after upsampling = ', net) all_logits = [all_logits[0], all_logits[-1]] #all_logits = [all_logits[1], all_logits[-1]] #all_logits = [all_logits[2], all_logits[-1]] with tf.variable_scope('head'): for i, logits in enumerate(all_logits): with tf.variable_scope('logits_'+str(i)): # FIX #net = tf.nn.relu(layers.batch_norm(net, **bn_params)) #logits = layers.conv2d(net, FLAGS.num_classes, 1, activation_fn=None, # data_format=data_format) logits = layers.conv2d(tf.nn.relu(logits), FLAGS.num_classes, 1, activation_fn=None, data_format=data_format) if data_format == 'NCHW': logits = tf.transpose(logits, perm=[0,2,3,1]) input_shape = tf.shape(image)[height_dim:height_dim+2] logits = tf.image.resize_bilinear(logits, input_shape, name='resize_logits') all_logits[i] = logits logits = all_logits.pop() return logits, all_logits
def build(inputs): weight_decay = 5e-4 bn_params = { # Decay for the moving averages. 'decay': 0.999, 'center': True, 'scale': True, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # None to force the updates 'updates_collections': None, 'is_training': False, } with tf.contrib.framework.arg_scope( [layers.convolution2d], kernel_size=3, stride=1, padding='SAME', rate=1, activation_fn=tf.nn.relu, # normalizer_fn=layers.batch_norm, normalizer_params=bn_params, # weights_initializer=layers.variance_scaling_initializer(), normalizer_fn=None, weights_initializer=None, weights_regularizer=layers.l2_regularizer(weight_decay)): import pdb #pdb.set_trace() net = layers.convolution2d(inputs, 64, scope='conv1_1') net = layers.convolution2d(net, 64, scope='conv1_2') net = layers.max_pool2d(net, 2, 2, scope='pool1') net = layers.convolution2d(net, 128, scope='conv2_1') net = layers.convolution2d(net, 128, scope='conv2_2') net = layers.max_pool2d(net, 2, 2, scope='pool2') net = layers.convolution2d(net, 256, scope='conv3_1') net = layers.convolution2d(net, 256, scope='conv3_2') net = layers.convolution2d(net, 256, scope='conv3_3') paddings = [[0, 0], [0, 0]] crops = [[0, 0], [0, 0]] block_size = 2 net = tf.space_to_batch(net, paddings=paddings, block_size=block_size) net = layers.convolution2d(net, 512, scope='conv4_1') net = layers.convolution2d(net, 512, scope='conv4_2') net = layers.convolution2d(net, 512, scope='conv4_3') net = tf.batch_to_space(net, crops=crops, block_size=block_size) block_size = 4 net = tf.space_to_batch(net, paddings=paddings, block_size=block_size) net = layers.convolution2d(net, 512, scope='conv5_1') net = layers.convolution2d(net, 512, scope='conv5_2') net = layers.convolution2d(net, 512, scope='conv5_3') net = tf.batch_to_space(net, crops=crops, block_size=block_size) with tf.contrib.framework.arg_scope( [layers.convolution2d], stride=1, padding='SAME', weights_initializer=layers.variance_scaling_initializer(), activation_fn=tf.nn.relu, normalizer_fn=layers.batch_norm, normalizer_params=bn_params, weights_regularizer=layers.l2_regularizer(1e-3)): net = layers.convolution2d(net, 512, kernel_size=3, scope='conv6_1', rate=4) logits = layers.convolution2d(net, 19, 1, padding='SAME', activation_fn=None, scope='unary_2', rate=2) print('logits', logits.get_shape()) #logits=tf.image.resize_bilinear(logits,[256,512],name='resize_score') return logits
def vol3d_encoder(self, x, name='Vol3D_Encoder'): with argscope([Conv3D], kernel_shape=4, padding='SAME', nl=tf.nn.elu): # x = x - VGG19_MEAN_TENSOR x = tf_2tanh(x) # x = x/255.0 x = tf.expand_dims(x, axis=0) # to 1 256 256 256 3 x = tf.transpose(x, [4, 1, 2, 3, 0]) # """ # x = (LinearWrap(x) # .Conv3D('conv1a', 16, strides = 2, padding='SAME') # # .Conv3D('conv2a', 32, strides = 2, padding='SAME') # # .Conv3D('conv3a', 64, strides = 2, padding='SAME') # # .Conv3D('conv4a', 128, strides = 2, padding='SAME') # # .Conv3D('conv5a', 256, strides = 2, padding='SAME') # # .Conv3D('conv6a', 1024, strides = 2, padding='SAME', use_bias=True, nl=tf.tanh) # 4x4x4x1024 # ()) """ with tf.contrib.framework.arg_scope([conv_3d], filter_size=4, strides=[1, 2, 2, 2, 1], activation='relu', reuse=False): with tf.contrib.framework.arg_scope([conv_3d_transpose], filter_size=4, strides=[1, 2, 2, 2, 1], activation='relu', reuse=False): # Encoder e1a = conv_3d(incoming=x, name="e1a", nb_filter=16, bias=False) r1a = tf_bottleneck(e1a, name="r1a", nb_filter=16) # r1a = tf.nn.dropout(r1a, keep_prob=0.5) e2a = conv_3d(incoming=r1a, name="e2a", nb_filter=32, bias=False) r2a = tf_bottleneck(e2a, name="r2a", nb_filter=32) # r2a = tf.nn.dropout(r2a, keep_prob=0.5) e3a = conv_3d(incoming=r2a, name="e3a", nb_filter=64, bias=False) r3a = tf_bottleneck(e3a, name="r3a", nb_filter=64) # r3a = tf.nn.dropout(r3a, keep_prob=0.5) e4a = conv_3d(incoming=r3a, name="e4a", nb_filter=128, bias=False) r4a = tf_bottleneck(e4a, name="r4a", nb_filter=128) # r4a = tf.nn.dropout(r4a, keep_prob=0.5) e5a = conv_3d(incoming=r4a, name="e5a", nb_filter=256, bias=False) r5a = tf_bottleneck(e5a, name="r5a", nb_filter=256) #r5a = tf.nn.dropout(r5a, keep_prob=0.5) e6a = conv_3d(incoming=r5a, name="e6a", nb_filter=1024, bias=False) r6a = tf_bottleneck(e6a, name="r6a", nb_filter=1024) # e7a = conv_3d(incoming=r6a, name="e7a", nb_filter=NB_FILTERS*8) , bias=False # r7a = tf_bottleneck(e7a, name="r7a", nb_filter=NB_FILTERS*8) # r7a = dropout(incoming=r7a, keep_prob=0.5) print("In1 :", x.get_shape().as_list()) print("E1a :", e1a.get_shape().as_list()) print("R1a :", r1a.get_shape().as_list()) print("E2a :", e2a.get_shape().as_list()) print("R2a :", r2a.get_shape().as_list()) print("E3a :", e3a.get_shape().as_list()) print("R3a :", r3a.get_shape().as_list()) print("E4a :", e4a.get_shape().as_list()) print("R4a :", r4a.get_shape().as_list()) print("E5a :", e5a.get_shape().as_list()) print("R5a :", r5a.get_shape().as_list()) print("E6a :", e6a.get_shape().as_list()) print("R6a :", r6a.get_shape().as_list()) x = r6a x = tf.transpose(x, [4, 1, 2, 3, 0]) ## x = tf.reshape(x, [-1, 4, 4, 3]) # x = tf.batch_to_space(x, crops=[[0,0],[0,0]], block_size=64,name='b2s') # x = x*255.0 x = tf_2imag(x) x = INLReLU(x) # x = x + VGG19_MEAN_TENSOR return x
def create_resample_net(self, shape, factor, use_new_frontend): """ The sub-graph in TF that could be expressed as a single Resample operation. """ # # Create Tensorflow model # import tensorflow as tf tf.compat.v1.reset_default_graph() # Create the graph and model with tf.compat.v1.Session() as sess: tf_x_shape = shape.copy() tf_x_shape = np.array(tf_x_shape)[[0, 2, 3, 1]] input = tf.compat.v1.placeholder(tf.float32, tf_x_shape, 'Input') transpose_1 = tf.transpose(a=input, perm=[1, 2, 3, 0]) expand_dims = tf.expand_dims(transpose_1, 0) tile = tf.tile(expand_dims, [factor * factor, 1, 1, 1, 1]) bts = tf.batch_to_space(tile, [factor, factor], [[0, 0], [0, 0]]) strided_slice = bts[0, ...] tf.transpose(a=strided_slice, perm=[3, 0, 1, 2]) tf.compat.v1.global_variables_initializer() tf_net = sess.graph_def # # Create reference IR net # Please, specify 'type': 'Input' for input node # Moreover, do not forget to validate ALL layer attributes!!! # ref_net = None if not use_new_frontend: new_shape = shape.copy() new_shape[2] *= factor new_shape[3] *= factor nodes_attributes = { 'input': { 'kind': 'op', 'type': 'Input' }, 'input_data': { 'shape': shape, 'kind': 'data' }, 'resample': { 'kind': 'op', 'type': 'caffe.ResampleParameter.NEAREST', "factor": factor, "height": 0, "width": 0, "antialias": 0 }, 'resample_data': { 'shape': new_shape, 'kind': 'data' }, } ref_net = build_graph(nodes_attributes, [('input', 'input_data'), ('input_data', 'resample'), ('resample', 'resample_data')]) return tf_net, ref_net
def PhaseShift_BatchToSpace(X, r): X = tf.transpose(X, (3, 1, 2, 0)) result = tf.batch_to_space(X, [[0, 0], [0, 0]], r) result = tf.transpose(result, (3, 1, 2, 0)) return result
def build(self, rgb, train=False, num_classes=3, random_init_fc8=False, debug=False, use_dilated=False): """ Build the VGG model using loaded weights Parameters ---------- rgb: image batch tensor Image in rgb shap. Scaled to Intervall [0, 255] train: bool Whether to build train or inference graph num_classes: int How many classes should be predicted (by fc8) random_init_fc8 : bool Whether to initialize fc8 layer randomly. Finetuning is required in this case. debug: bool Whether to print additional Debug Information. """ # Convert RGB to BGR # with tf.name_scope('Processing'): # # red, green, blue = tf.split(rgb, 3, 3) # # assert red.get_shape().as_list()[1:] == [224, 224, 1] # # assert green.get_shape().as_list()[1:] == [224, 224, 1] # # assert blue.get_shape().as_list()[1:] == [224, 224, 1] # bgr = tf.concat([ # blue - VGG_MEAN[0], # green - VGG_MEAN[1], # red - VGG_MEAN[2], # ], 3) # # if debug: # bgr = tf.Print(bgr, [tf.shape(bgr)], # message='Shape of input image: ', # summarize=4, first_n=1) bgr = rgb self.conv1_1 = self._conv_layer(bgr, "conv1_1") self.conv1_2 = self._conv_layer(self.conv1_1, "conv1_2") self.pool1 = self._max_pool(self.conv1_2, 'pool1', debug) self.conv2_1 = self._conv_layer(self.pool1, "conv2_1") self.conv2_2 = self._conv_layer(self.conv2_1, "conv2_2") self.pool2 = self._max_pool(self.conv2_2, 'pool2', debug) self.conv3_1 = self._conv_layer(self.pool2, "conv3_1") self.conv3_2 = self._conv_layer(self.conv3_1, "conv3_2") self.conv3_3 = self._conv_layer(self.conv3_2, "conv3_3") self.pool3 = self._max_pool(self.conv3_3, 'pool3', debug) self.conv4_1 = self._conv_layer(self.pool3, "conv4_1") self.conv4_2 = self._conv_layer(self.conv4_1, "conv4_2") self.conv4_3 = self._conv_layer(self.conv4_2, "conv4_3") if use_dilated: pad = [[0, 0], [0, 0]] self.pool4 = tf.nn.max_pool(self.conv4_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool4') self.pool4 = tf.space_to_batch(self.pool4, paddings=pad, block_size=2) else: self.pool4 = self._max_pool(self.conv4_3, 'pool4', debug) self.conv5_1 = self._conv_layer(self.pool4, "conv5_1") self.conv5_2 = self._conv_layer(self.conv5_1, "conv5_2") self.conv5_3 = self._conv_layer(self.conv5_2, "conv5_3") if use_dilated: pad = [[0, 0], [0, 0]] self.pool5 = tf.nn.max_pool(self.conv5_3, ksize=[1, 2, 2, 1], strides=[1, 1, 1, 1], padding='SAME', name='pool5') self.pool5 = tf.space_to_batch(self.pool5, paddings=pad, block_size=2) else: self.pool5 = self._max_pool(self.conv5_3, 'pool5', debug) self.fc6 = self._fc_layer(self.pool5, "fc6") if train: self.fc6 = tf.nn.dropout(self.fc6, 0.5) self.fc7 = self._fc_layer(self.fc6, "fc7") if train: self.fc7 = tf.nn.dropout(self.fc7, 0.5) if use_dilated: self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.pool5 = tf.batch_to_space(self.pool5, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) self.fc7 = tf.batch_to_space(self.fc7, crops=pad, block_size=2) return if random_init_fc8: self.score_fr = self._score_layer(self.fc7, "score_fr", num_classes) else: self.score_fr = self._fc_layer(self.fc7, "score_fr", num_classes=num_classes, relu=False) self.pred = tf.argmax(self.score_fr, dimension=3) self.upscore2 = self._upscore_layer(self.score_fr, shape=tf.shape(self.pool4), num_classes=num_classes, debug=debug, name='upscore2', ksize=4, stride=2) self.score_pool4 = self._score_layer(self.pool4, "score_pool4", num_classes=num_classes) self.fuse_pool4 = tf.add(self.upscore2, self.score_pool4) self.upscore4 = self._upscore_layer(self.fuse_pool4, shape=tf.shape(self.pool3), num_classes=num_classes, debug=debug, name='upscore4', ksize=4, stride=2) self.score_pool3 = self._score_layer(self.pool3, "score_pool3", num_classes=num_classes) self.fuse_pool3 = tf.add(self.upscore4, self.score_pool3) self.upscore32 = self._upscore_layer(self.fuse_pool3, shape=tf.shape(bgr), num_classes=num_classes, debug=debug, name='upscore32', ksize=16, stride=8) self.pred_up = tf.argmax(self.upscore32, dimension=3) self.softmax = tf.nn.softmax(self.upscore32, dim=3)
def upsample(x, n): """Upscales the width and height of the input vector by a factor of n.""" # Using this approach the same as in subpixel convolutions? if n < 2: return x return tf.batch_to_space(tf.tile(x, [n**2, 1, 1, 1]), [[0, 0], [0, 0]], n)
def _PS(self, I, r): X = tf.transpose(a=I, perm=[2, 1, 0]) # (r, w, b) X = tf.batch_to_space(input=X, block_shape=[r], crops=[[0, 0]]) # (1, r*w, b) X = tf.transpose(a=X, perm=[2, 1, 0]) return X
def Batch2Space(bottom, crops, block_size=2, name=''): with tf.variable_scope(name): top = tf.batch_to_space(bottom, crops=crops, block_size=block_size) print(bottom.name, '->', top.name) return top