def testGradient(self): # Set graph seed for determinism. random_seed = 42 tf.set_random_seed(random_seed) with self.test_session(): for test_case in self._TEST_CASES: np.random.seed(random_seed) in_shape = test_case['in_shape'] in_val = tf.constant(np.random.random(in_shape), dtype=tf.float32) for padding in ['VALID', 'SAME']: out_val = tf.extract_image_patches(in_val, test_case['ksizes'], test_case['strides'], test_case['rates'], padding) out_shape = out_val.get_shape().as_list() err = tf.test.compute_gradient_error( in_val, in_shape, out_val, out_shape ) print('extract_image_patches gradient err: %.4e' % err) self.assertLess(err, 1e-4)
def _VerifyValues(self, image, ksizes, strides, rates, padding, patches): """Tests input-output pairs for the ExtractImagePatches op. Args: image: Input tensor with shape: [batch, in_rows, in_cols, depth]. ksizes: Patch size specified as: [ksize_rows, ksize_cols]. strides: Output strides, specified as [stride_rows, stride_cols]. rates: Atrous rates, specified as [rate_rows, rate_cols]. padding: Padding type. patches: Expected output. """ ksizes = [1] + ksizes + [1] strides = [1] + strides + [1] rates = [1] + rates + [1] for use_gpu in [False, True]: with self.test_session(use_gpu=use_gpu): out_tensor = tf.extract_image_patches( tf.constant(image), ksizes=ksizes, strides=strides, rates=rates, padding=padding, name="im2col") self.assertAllClose(patches, out_tensor.eval())
def read_tensor_from_image_file(file_name): input_name = "file_reader" output_name = "normalized" width = input_size height = input_size num_channels = 3 file_reader = tf.read_file(file_name, input_name) if file_name.endswith(".png"): image_reader = tf.image.decode_png(file_reader, channels = 3, name='png_reader') elif file_name.endswith(".gif"): image_reader = tf.squeeze(tf.image.decode_gif(file_reader, name='gif_reader')) elif file_name.endswith(".bmp"): image_reader = tf.image.decode_bmp(file_reader, name='bmp_reader') else: image_reader = tf.image.decode_jpeg(file_reader, channels = 3, name='jpeg_reader') float_caster = tf.cast(image_reader, tf.float32) dims_expander = tf.expand_dims(float_caster, 0); # resized = tf.image.resize_bilinear(dims_expander, [input_size, input_size]) normalized = tf.divide(tf.subtract(dims_expander, [input_mean]), [input_std]) patches = tf.extract_image_patches(normalized, ksizes=[1, patch_height, patch_width, 1], strides=[1, patch_height/4, patch_width/4, 1], rates=[1,1,1,1], padding="VALID") patches_shape = tf.shape(patches) patches = tf.reshape(patches, [-1, patch_height, patch_width, num_channels]) patches = tf.image.resize_images(patches, [height, width]) patches = tf.reshape(patches, [-1, height, width, num_channels]) sess = tf.Session() return sess.run([patches, patches_shape])
def style_swap(content, style, patch_size, stride): '''Efficiently swap content feature patches with nearest-neighbor style patches Original paper: https://arxiv.org/abs/1612.04337 Adapted from: https://github.com/rtqichen/style-swap/blob/master/lib/NonparametricPatchAutoencoderFactory.lua ''' nC = tf.shape(style)[-1] # Num channels of input content feature and style-swapped output ### Extract patches from style image that will be used for conv/deconv layers style_patches = tf.extract_image_patches(style, [1,patch_size,patch_size,1], [1,stride,stride,1], [1,1,1,1], 'VALID') before_reshape = tf.shape(style_patches) # NxRowsxColsxPatch_size*Patch_size*nC style_patches = tf.reshape(style_patches, [before_reshape[1]*before_reshape[2],patch_size,patch_size,nC]) style_patches = tf.transpose(style_patches, [1,2,3,0]) # Patch_sizexPatch_sizexIn_CxOut_c # Normalize each style patch style_patches_norm = tf.nn.l2_normalize(style_patches, dim=3) # Compute cross-correlation/nearest neighbors of patches by using style patches as conv filters ss_enc = tf.nn.conv2d(content, style_patches_norm, [1,stride,stride,1], 'VALID') # For each spatial position find index of max along channel/patch dim ss_argmax = tf.argmax(ss_enc, axis=3) encC = tf.shape(ss_enc)[-1] # Num channels in intermediate conv output, same as # of patches # One-hot encode argmax with same size as ss_enc, with 1's in max channel idx for each spatial pos ss_oh = tf.one_hot(ss_argmax, encC, 1., 0., 3) # Calc size of transposed conv out deconv_out_H = utils.deconv_output_length(tf.shape(ss_oh)[1], patch_size, 'valid', stride) deconv_out_W = utils.deconv_output_length(tf.shape(ss_oh)[2], patch_size, 'valid', stride) deconv_out_shape = tf.stack([1,deconv_out_H,deconv_out_W,nC]) # Deconv back to original content size with highest matching (unnormalized) style patch swapped in for each content patch ss_dec = tf.nn.conv2d_transpose(ss_oh, style_patches, deconv_out_shape, [1,stride,stride,1], 'VALID') ### Interpolate to average overlapping patch locations ss_oh_sum = tf.reduce_sum(ss_oh, axis=3, keep_dims=True) filter_ones = tf.ones([patch_size,patch_size,1,1], dtype=tf.float32) deconv_out_shape = tf.stack([1,deconv_out_H,deconv_out_W,1]) # Same spatial size as ss_dec with 1 channel counting = tf.nn.conv2d_transpose(ss_oh_sum, filter_ones, deconv_out_shape, [1,stride,stride,1], 'VALID') counting = tf.tile(counting, [1,1,1,nC]) # Repeat along channel dim to make same size as ss_dec interpolated_dec = tf.divide(ss_dec, counting) return interpolated_dec
def extract_patches(sess,data,width,stride): ''' Extract patches from images :data input image :width dimensiton of the patch :stride stride of patch selection on the image ''' print('Patch extraction with stride=%d and width=%d begins'%(stride,width) ) data_pl=tf.placeholder(tf.float64, [data.shape[0],data.shape[1],data.shape[2],data.shape[3]], name='data_placeholder') data_o=tf.extract_image_patches(images=data_pl,ksizes=[1,width,width,1],strides=[1,stride,stride,1],rates=[1,1,1,1],padding='VALID') print('Patch extraction done') size_tot=data_o.get_shape().as_list() data_o=tf.reshape(data_o,[size_tot[1]*size_tot[2],width,width,data.shape[3]]) Data_o= sess.run(data_o,feed_dict={data_pl: data}) print('%d patches of size %d x %d created as list'%(Data_o.shape[0],Data_o.shape[1],Data_o.shape[2])) return Data_o
def testGradient(self): with self.test_session(): for test in self.test_cases: in_shape = test['in_shape'] in_val = tf.constant(np.random.random(in_shape), dtype=tf.float32) for padding in ['VALID', 'SAME']: out_val = tf.extract_image_patches(in_val, test['ksizes'], test['strides'], test['rates'], padding) out_shape = out_val.get_shape().as_list() err = tf.test.compute_gradient_error( in_val, in_shape, out_val, out_shape ) print('extract_image_patches gradient err: %.4e' % err) self.assertLess(err, 1e-4)
def _build_net(self): with tf.variable_scope(self.name, values=[self.in_size, self.ksize, self.lr]): # input place holders self.X = tf.placeholder( tf.float32, shape=[None, self.in_size, self.in_size, 1]) # img 64x64x1 (gray scale) self.Y = tf.placeholder(tf.float32, shape=[None, self.in_size, self.in_size]) self.keep = tf.placeholder(tf.float32) in_x = tf.image.resize_image_with_crop_or_pad( self.X, self.in_size + self.ksize - 1, self.in_size + self.ksize - 1) in_x = tf.extract_image_patches( in_x, ksizes=[1, self.in_size, self.in_size, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding="VALID") w1 = init_w('w1', [1, self.ksize, self.ksize, in_x.shape[3]]) L1 = tf.reduce_sum(tf.multiply(in_x, w1), axis=(1, 2)) L1 = tf.reshape(L1, [-1, self.in_size, self.in_size, 1]) in_L2 = tf.image.resize_image_with_crop_or_pad( L1, self.in_size + self.ksize - 1, self.in_size + self.ksize - 1) in_L2 = tf.extract_image_patches( in_L2, ksizes=[1, self.in_size, self.in_size, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding="VALID") w2 = init_w('w2', [1, self.ksize, self.ksize, in_L2.shape[3]]) L2 = tf.reduce_sum(tf.multiply(in_L2, w2), axis=(1, 2)) L2 = tf.reshape(L2, [-1, self.in_size, self.in_size, 1]) in_L3 = tf.image.resize_image_with_crop_or_pad( L2, self.in_size + self.ksize - 1, self.in_size + self.ksize - 1) in_L3 = tf.extract_image_patches( in_L3, ksizes=[1, self.in_size, self.in_size, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding="VALID") w3 = init_w('w3', [1, self.ksize, self.ksize, in_L2.shape[3]]) L3 = tf.reduce_sum(tf.multiply(in_L3, w3), axis=(1, 2)) L3 = tf.reshape(L2, [-1, self.in_size, self.in_size, 1]) in_L4 = tf.image.resize_image_with_crop_or_pad( L3, self.in_size + self.ksize - 1, self.in_size + self.ksize - 1) in_L4 = tf.extract_image_patches( in_L4, ksizes=[1, self.in_size, self.in_size, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding="VALID") w4 = init_w('w4', [1, self.ksize, self.ksize, in_L4.shape[3]]) L4 = tf.reduce_sum(tf.multiply(in_L4, w4), axis=(1, 2)) self.logits = tf.reshape(L4, [-1, self.in_size, self.in_size]) # define cost/loss & optimizer #beta = 0.01 #self.regularizers = tf.nn.l2_loss(w1)# + tf.nn.l2_loss(w2) self.cost = tf.reduce_mean( tf.square(self.logits - self.Y)) # + beta*self.regularizers) self.optimizer = tf.train.AdamOptimizer( learning_rate=self.lr).minimize(self.cost)
def compute_stats(self, loss_sampled, var_list=None): varlist = var_list if varlist is None: varlist = tf.trainable_variables() gs = tf.gradients(loss_sampled, varlist, name='gradientsSampled') self.gs = gs factors = self.getFactors(gs, varlist) stats = self.getStats(factors, varlist) updateOps = [] statsUpdates = {} statsUpdates_cache = {} for var in varlist: opType = factors[var]['opName'] fops = factors[var]['op'] fpropFactor = factors[var]['fpropFactors_concat'] fpropStats_vars = stats[var]['fprop_concat_stats'] bpropFactor = factors[var]['bpropFactors_concat'] bpropStats_vars = stats[var]['bprop_concat_stats'] SVD_factors = {} for stats_var in fpropStats_vars: stats_var_dim = int(stats_var.get_shape()[0]) if stats_var not in statsUpdates_cache: old_fpropFactor = fpropFactor B = (tf.shape(fpropFactor)[0]) # batch size if opType == 'Conv2D': strides = fops.get_attr("strides") padding = fops.get_attr("padding") convkernel_size = var.get_shape()[0:3] KH = int(convkernel_size[0]) KW = int(convkernel_size[1]) C = int(convkernel_size[2]) flatten_size = int(KH * KW * C) Oh = int(bpropFactor.get_shape()[1]) Ow = int(bpropFactor.get_shape()[2]) if Oh == 1 and Ow == 1 and self._channel_fac: # factorization along the channels # assume independence among input channels # factor = B x 1 x 1 x (KH xKW x C) # patches = B x Oh x Ow x (KH xKW x C) if len(SVD_factors) == 0: if KFAC_DEBUG: print(('approx %s act factor with rank-1 SVD factors' % (var.name))) # find closest rank-1 approx to the feature map S, U, V = tf.batch_svd(tf.reshape( fpropFactor, [-1, KH * KW, C])) # get rank-1 approx slides sqrtS1 = tf.expand_dims(tf.sqrt(S[:, 0, 0]), 1) patches_k = U[:, :, 0] * sqrtS1 # B x KH*KW full_factor_shape = fpropFactor.get_shape() patches_k.set_shape( [full_factor_shape[0], KH * KW]) patches_c = V[:, :, 0] * sqrtS1 # B x C patches_c.set_shape([full_factor_shape[0], C]) SVD_factors[C] = patches_c SVD_factors[KH * KW] = patches_k fpropFactor = SVD_factors[stats_var_dim] else: # poor mem usage implementation patches = tf.extract_image_patches(fpropFactor, ksizes=[1, convkernel_size[ 0], convkernel_size[1], 1], strides=strides, rates=[1, 1, 1, 1], padding=padding) if self._approxT2: if KFAC_DEBUG: print(('approxT2 act fisher for %s' % (var.name))) # T^2 terms * 1/T^2, size: B x C fpropFactor = tf.reduce_mean(patches, [1, 2]) else: # size: (B x Oh x Ow) x C fpropFactor = tf.reshape( patches, [-1, flatten_size]) / Oh / Ow fpropFactor_size = int(fpropFactor.get_shape()[-1]) if stats_var_dim == (fpropFactor_size + 1) and not self._blockdiag_bias: if opType == 'Conv2D' and not self._approxT2: # correct padding for numerical stability (we # divided out OhxOw from activations for T1 approx) fpropFactor = tf.concat([fpropFactor, tf.ones( [tf.shape(fpropFactor)[0], 1]) / Oh / Ow], 1) else: # use homogeneous coordinates fpropFactor = tf.concat( [fpropFactor, tf.ones([tf.shape(fpropFactor)[0], 1])], 1) # average over the number of data points in a batch # divided by B cov = tf.matmul(fpropFactor, fpropFactor, transpose_a=True) / tf.cast(B, tf.float32) updateOps.append(cov) statsUpdates[stats_var] = cov if opType != 'Conv2D': # HACK: for convolution we recompute fprop stats for # every layer including forking layers statsUpdates_cache[stats_var] = cov for stats_var in bpropStats_vars: stats_var_dim = int(stats_var.get_shape()[0]) if stats_var not in statsUpdates_cache: old_bpropFactor = bpropFactor bpropFactor_shape = bpropFactor.get_shape() B = tf.shape(bpropFactor)[0] # batch size C = int(bpropFactor_shape[-1]) # num channels if opType == 'Conv2D' or len(bpropFactor_shape) == 4: if fpropFactor is not None: if self._approxT2: if KFAC_DEBUG: print(('approxT2 grad fisher for %s' % (var.name))) bpropFactor = tf.reduce_sum( bpropFactor, [1, 2]) # T^2 terms * 1/T^2 else: bpropFactor = tf.reshape( bpropFactor, [-1, C]) * Oh * Ow # T * 1/T terms else: # just doing block diag approx. spatial independent # structure does not apply here. summing over # spatial locations if KFAC_DEBUG: print(('block diag approx fisher for %s' % (var.name))) bpropFactor = tf.reduce_sum(bpropFactor, [1, 2]) # assume sampled loss is averaged. TO-DO:figure out better # way to handle this bpropFactor *= tf.to_float(B) ## cov_b = tf.matmul( bpropFactor, bpropFactor, transpose_a=True) / tf.to_float(tf.shape(bpropFactor)[0]) updateOps.append(cov_b) statsUpdates[stats_var] = cov_b statsUpdates_cache[stats_var] = cov_b if KFAC_DEBUG: aKey = list(statsUpdates.keys())[0] statsUpdates[aKey] = tf.Print(statsUpdates[aKey], [tf.convert_to_tensor('step:'), self.global_step, tf.convert_to_tensor( 'computing stats'), ]) self.statsUpdates = statsUpdates return statsUpdates
def contextual_attention(f, b, mask=None, ksize=3, stride=1, rate=1, fuse_k=3, softmax_scale=10., training=True, fuse=True): """ Contextual attention layer implementation. Contextual attention is first introduced in publication: Generative Image Inpainting with Contextual Attention, Yu et al. Args: x: Input feature to match (foreground). t: Input feature for match (background). mask: Input mask for t, indicating patches not available. ksize: Kernel size for contextual attention. stride: Stride for extracting patches from t. rate: Dilation for matching. softmax_scale: Scaled softmax for attention. training: Indicating if current graph is training or inference. Returns: tf.Tensor: output """ # get shapes raw_fs = tf.shape(f) raw_int_fs = f.get_shape().as_list() # B, H, W, C raw_int_bs = b.get_shape().as_list() # B, H, W, C # extract patches from background with stride and rate kernel = 2 * rate # 2d raw_w = tf.extract_image_patches( b, ksizes=[1, kernel, kernel, 1], strides=[1, rate * stride, rate * stride, 1], rates=[1, 1, 1, 1], padding='SAME') # B, H//(d*s), W//(d*s), 2d*2d*C raw_w = tf.reshape(raw_w, [raw_int_bs[0], -1, kernel, kernel, raw_int_bs[3] ]) # B, H//(d*s) * W//(d*s), k, k, C raw_w = tf.transpose( raw_w, [0, 2, 3, 4, 1 ]) # transpose to b*k*k*c*hw --> B, k, k, C, H//(d*s) * W//(d*s) # downscaling foreground option: downscaling both foreground and # background for matching and use original background for reconstruction. f = resize(f, scale=1. / rate, func=tf.image.resize_nearest_neighbor) # B, H//d, W//d, C b = resize(b, to_shape=[int(raw_int_bs[1] / rate), int(raw_int_bs[2] / rate)], func=tf.image.resize_nearest_neighbor ) # https://github.com/tensorflow/tensorflow/issues/11651 # B, H//d, W//d, C if mask is not None: mask = resize( mask, scale=1. / rate, func=tf.image.resize_nearest_neighbor) # B, H//d, W//d, C fs = tf.shape(f) int_fs = f.get_shape().as_list() # B, H//d, W//d, C f_groups = tf.split(f, int_fs[0], axis=0) # list[Tensor(H//d, W//d, C)] len: B # from t(H*W*C) to w(b*k*k*c*h*w) bs = tf.shape(b) int_bs = b.get_shape().as_list() # B, H//d, W//d, C w = tf.extract_image_patches(b, [1, ksize, ksize, 1], [1, stride, stride, 1], [1, 1, 1, 1], padding='SAME') # B, H//d, W//d, k * k * C w = tf.reshape( w, [int_fs[0], -1, ksize, ksize, int_fs[3]]) # B, H//d * W//d, k, k, C w = tf.transpose(w, [0, 2, 3, 4, 1]) # B, k, k, C, H//d * W//d # process mask if mask is None: mask = tf.zeros([1, bs[1], bs[2], 1]) m = tf.extract_image_patches(mask, [1, ksize, ksize, 1], [1, stride, stride, 1], [1, 1, 1, 1], padding='SAME') m = tf.reshape(m, [1, -1, ksize, ksize, 1]) m = tf.transpose(m, [0, 2, 3, 4, 1]) # 1, k, k, 1, H//d * W//d m = m[0] # mask is shared in a batch --> k, k, 1, H//d * W//d mm = tf.cast( tf.equal(tf.reduce_mean(m, axis=[0, 1, 2], keep_dims=True), 0.), tf.float32) # (1, 1, 1, H//d * W//d) --> if zero, this patch is all-blank w_groups = tf.split(w, int_bs[0], axis=0) # list[Tensor(1, k, k, C, H//d * W//d)] len: B raw_w_groups = tf.split( raw_w, int_bs[0], axis=0) # list[Tensor(2d, 2d, C, H//d * W//d)] len: B y = [] offsets = [] k = fuse_k scale = softmax_scale fuse_weight = tf.reshape(tf.eye(k), [k, k, 1, 1]) for xi, wi, raw_wi in zip(f_groups, w_groups, raw_w_groups): # conv for compare wi = wi[0] wi_normed = wi / tf.maximum( tf.sqrt(tf.reduce_sum(tf.square(wi), axis=[0, 1, 2])), 1e-4) yi = tf.nn.conv2d(xi, wi_normed, strides=[1, 1, 1, 1], padding="SAME") # 1, H//d, W//d, H//d * W//d # ???: xi is not normalized ??? # conv implementation for fuse scores to encourage large patches if fuse: yi = tf.reshape(yi, [1, fs[1] * fs[2], bs[1] * bs[2], 1 ]) # 1, H//d * W//d, H//d * W//d, 1 yi = tf.nn.conv2d(yi, fuse_weight, strides=[1, 1, 1, 1], padding='SAME') # 1, H//d * W//d, H//d * W//d, 1 yi = tf.reshape( yi, [1, fs[1], fs[2], bs[1], bs[2]]) # 1, H//d, W//d, H//d, W//d yi = tf.transpose(yi, [0, 2, 1, 4, 3]) # 1, W//d, H//d, W//d, H//d yi = tf.reshape(yi, [1, fs[1] * fs[2], bs[1] * bs[2], 1 ]) # 1, W//d * H//d, W//d * H//d, 1 yi = tf.nn.conv2d(yi, fuse_weight, strides=[1, 1, 1, 1], padding='SAME') # 1, W//d * H//d, W//d * H//d, 1 yi = tf.reshape(yi, [1, fs[2], fs[1], bs[2], bs[1] ]) # 1, W//d * H//d, W//d * H//d, 1 yi = tf.transpose(yi, [0, 2, 1, 4, 3]) # 1, H//d, W//d, H//d, W//d yi = tf.reshape( yi, [1, fs[1], fs[2], bs[1] * bs[2]]) # 1, H//d, W//d, H//d * W//d # softmax to match yi *= mm # mask yi = tf.nn.softmax(yi * scale, 3) yi *= mm # mask offset = tf.argmax(yi, axis=3, output_type=tf.int32) offset = tf.stack([offset // fs[2], offset % fs[2]], axis=-1) # this is the most correlated idx # deconv for patch pasting # 3.1 paste center wi_center = raw_wi[0] # 2*d, 2*d, C, H//d * W//d yi = tf.nn.conv2d_transpose(yi, wi_center, tf.concat([[1], raw_fs[1:]], axis=0), strides=[1, rate, rate, 1]) / 4. y.append(yi) offsets.append(offset) y = tf.concat(y, axis=0) y.set_shape(raw_int_fs) offsets = tf.concat(offsets, axis=0) offsets.set_shape(int_bs[:3] + [2]) # case1: visualize optical flow: minus current position h_add = tf.tile(tf.reshape(tf.range(bs[1]), [1, bs[1], 1, 1]), [bs[0], 1, bs[2], 1]) w_add = tf.tile(tf.reshape(tf.range(bs[2]), [1, 1, bs[2], 1]), [bs[0], bs[1], 1, 1]) offsets = offsets - tf.concat([h_add, w_add], axis=3) # to flow image flow = flow_to_image_tf(offsets) # # case2: visualize which pixels are attended # flow = highlight_flow_tf(offsets * tf.cast(mask, tf.int32)) if rate != 1: flow = resize(flow, scale=rate, func=tf.image.resize_bilinear) return y, flow
# -*- coding=utf-8 -*- import tensorflow as tf def showParametersInCkpt(ckpt_dir): ''' 显示出ckpt文件中的参数名称 :param ckpt_dir: :return: ''' from tensorflow.python.tools.inspect_checkpoint import print_tensors_in_checkpoint_file latest_ckp = tf.train.latest_checkpoint(ckpt_dir) # print_tensors_in_checkpoint_file(latest_ckp, all_tensors=True, tensor_name='', all_tensor_names=True) from tensorflow.contrib.framework.python.framework import checkpoint_utils var_list = checkpoint_utils.list_variables(latest_ckp) for v in var_list: print(v) if __name__ == '__main__': # ckpt_dir = '/home/give/PycharmProjects/weakly_label_segmentation/logs/1s_weakly_label-transpose' # showParametersInCkpt(ckpt_dir) tensor = tf.random_normal([4, 256, 256, 128]) res = tf.extract_image_patches(tensor, [1, 5, 5, 1], [1, 1, 1, 1], [1, 1, 1, 1], 'VALID') print res
def extract_patches(x, ksizes, strides, rates): return tf.extract_image_patches( x, ksizes, strides, rates, padding="VALID" )
def attention_transfer(f, b1, b2, ksize=3, stride=1, fuse_k=3, softmax_scale=50., fuse=False): # extract patches from background feature maps with rate (1st scale) bs1 = tf.shape(b1) int_bs1 = b1.get_shape().as_list() w_b1 = tf.extract_image_patches(b1, [1, 4, 4, 1], [1, 4, 4, 1], [1, 1, 1, 1], padding='SAME') w_b1 = tf.reshape(w_b1, [int_bs1[0], -1, 4, 4, int_bs1[3]]) w_b1 = tf.transpose(w_b1, [0, 2, 3, 4, 1]) # transpose to b*k*k*c*hw # extract patches from background feature maps with rate (2nd scale) bs2 = tf.shape(b2) int_bs2 = b2.get_shape().as_list() w_b2 = tf.extract_image_patches(b2, [1, 2, 2, 1], [1, 2, 2, 1], [1, 1, 1, 1], padding='SAME') w_b2 = tf.reshape(w_b2, [int_bs2[0], -1, 2, 2, int_bs2[3]]) w_b2 = tf.transpose(w_b2, [0, 2, 3, 4, 1]) # transpose to b*k*k*c*hw # use structure feature maps as foreground for matching and use background feature maps for reconstruction. fs = tf.shape(f) int_fs = f.get_shape().as_list() f_groups = tf.split(f, int_fs[0], axis=0) w_f = tf.extract_image_patches(f, [1, ksize, ksize, 1], [1, stride, stride, 1], [1, 1, 1, 1], padding='SAME') w_f = tf.reshape(w_f, [int_fs[0], -1, ksize, ksize, int_fs[3]]) w_f = tf.transpose(w_f, [0, 2, 3, 4, 1]) # transpose to b*k*k*c*hw w_f_groups = tf.split(w_f, int_fs[0], axis=0) w_b1_groups = tf.split(w_b1, int_bs1[0], axis=0) w_b2_groups = tf.split(w_b2, int_bs2[0], axis=0) y1 = [] y2 = [] k = fuse_k scale = softmax_scale fuse_weight = tf.reshape(tf.eye(k), [k, k, 1, 1]) for xi, wi, raw1_wi, raw2_wi in zip(f_groups, w_f_groups, w_b1_groups, w_b2_groups): # conv for compare wi = wi[0] #(k,k,c,hw) onesi = tf.ones_like(wi) xxi = tf.nn.conv2d(tf.square(xi), onesi, strides=[1, 1, 1, 1], padding="SAME") #(1,h,w,hw) wwi = tf.reduce_sum(tf.square(wi), axis=[0, 1, 2], keep_dims=True) #(1,1,1,hw) xwi = tf.nn.conv2d(xi, wi, strides=[1, 1, 1, 1], padding="SAME") #(1,h,w,hw) di = xxi + wwi - 2 * xwi di_mean, di_var = tf.nn.moments(di, 3, keep_dims=True) di_std = di_var**0.5 yi = -1 * tf.nn.tanh((di - di_mean) / di_std) # conv implementation for fuse scores to encourage large patches if fuse: yi = tf.reshape(yi, [1, fs[1] * fs[2], fs[1] * fs[2], 1]) yi = tf.nn.conv2d(yi, fuse_weight, strides=[1, 1, 1, 1], padding='SAME') yi = tf.reshape(yi, [1, fs[1], fs[2], fs[1], fs[2]]) yi = tf.transpose(yi, [0, 2, 1, 4, 3]) yi = tf.reshape(yi, [1, fs[1] * fs[2], fs[1] * fs[2], 1]) yi = tf.nn.conv2d(yi, fuse_weight, strides=[1, 1, 1, 1], padding='SAME') yi = tf.reshape(yi, [1, fs[2], fs[1], fs[2], fs[1]]) yi = tf.transpose(yi, [0, 2, 1, 4, 3]) yi = tf.reshape(yi, [1, fs[1], fs[2], fs[1] * fs[2]]) # softmax to match yi = tf.nn.softmax(yi * scale, 3) wi_center1 = raw1_wi[0] wi_center2 = raw2_wi[0] y1.append( tf.nn.conv2d_transpose(yi, wi_center1, tf.concat([[1], bs1[1:]], axis=0), strides=[1, 4, 4, 1])) y2.append( tf.nn.conv2d_transpose(yi, wi_center2, tf.concat([[1], bs2[1:]], axis=0), strides=[1, 2, 2, 1])) y1 = tf.concat(y1, axis=0) y2 = tf.concat(y2, axis=0) return y1, y2
def _convert_dataset(split_name, filenames, class_names_to_ids, dataset_dir, ksize_rows=299, ksize_cols=299, strides_rows=128, strides_cols=128): """Converts the given filenames to a TFRecord dataset. Args: split_name: The name of the dataset, either 'train' or 'validation'. filenames: A list of absolute paths to png or jpg images. class_names_to_ids: A dictionary from class names (strings) to ids (integers). dataset_dir: The directory where the converted datasets are stored. ksize_rows, ksize_cols: the height and width of extracted image patches strides_rows, strides_cols: the distance between the centers of two consecutive patches """ assert split_name in ['train', 'validation'] num_per_shard = int(math.ceil(len(filenames) / float(_NUM_SHARDS))) number_of_images = 0 # The size of sliding window ksizes = [1, ksize_rows, ksize_cols, 1] # How far the centers of 2 consecutive patches are in the image strides = [1, strides_rows, strides_cols, 1] rates = [1, 1, 1, 1] # sample pixel consecutively padding = 'VALID' # or 'SAME' total_image_number = len(filenames) d = dict.fromkeys(class_names_to_ids.values()) with tf.Graph().as_default(): with tf.Session('') as sess: for shard_id in range(_NUM_SHARDS): output_filename = _get_dataset_filename( dataset_dir, split_name, shard_id) with tf.python_io.TFRecordWriter( output_filename) as tfrecord_writer: start_ndx = shard_id * num_per_shard end_ndx = min((shard_id + 1) * num_per_shard, len(filenames)) for i in range(start_ndx, end_ndx): try: # Read the filename: image_data = tf.gfile.FastGFile(filenames[i], 'r').read() except Exception as e: sys.stderr.write( "Error in decoding image {} into tensor - {}.". format(filenames[i], str(e))) continue image = tf.image.decode_image(image_data, channels=3) image = tf.expand_dims(image, 0) image_patches = tf.extract_image_patches( image, ksizes, strides, rates, padding) image_patch_shape = sess.run(tf.shape(image_patches)) nrows, ncols = image_patch_shape[1], image_patch_shape[ 2] #print('{},{}'.format(nrows,ncols), file=sys.stderr) class_name = os.path.basename( os.path.dirname(filenames[i])) class_id = class_names_to_ids[class_name] for nr in range(nrows): for nc in range(ncols): patch_image = tf.reshape( image_patches[0, nr, nc, ], [ksize_rows, ksize_cols, 3]) height, width = ksize_rows, ksize_cols k = 0 # original image patch patch_image_data = tf.image.encode_jpeg( patch_image) example = dataset_utils.image_to_tfexample( sess.run(patch_image_data), 'jpg', height, width, class_id) tfrecord_writer.write( example.SerializeToString()) k += 1 flipped_image = tf.image.flip_left_right( patch_image) flipped_image_data = tf.image.encode_jpeg( flipped_image) example = dataset_utils.image_to_tfexample( sess.run(flipped_image_data), 'jpg', height, width, class_id) tfrecord_writer.write( example.SerializeToString()) k += 1 k += _rotate_flip(sess, patch_image, tfrecord_writer, height, width, class_id) k += _image_random_X(sess, patch_image, tfrecord_writer, height, width, class_id) number_of_images += k d[class_id] += k sys.stdout.write( '\r>> Converting image {}/{} shard {}, patch {}/{},{}/{}, total patch: {}.' .format(i + 1, total_image_number, shard_id, nr, nrows, nc, ncols, number_of_images)) sys.stdout.flush() #sys.stdout.write('\r>> Converting image %d/%d shard %d' % (i+1, len(filenames), shard_id)) #sys.stdout.flush() sys.stdout.write('\n') sys.stdout.flush() with open('{}/class_id_number.txt'.format(dataset_dir), 'w') as f: for k, v in d.iteritems(): print('{}:{}'.format(k, v), file=f) with open('{}/{}_number_of_images.txt'.format(dataset_dir, split_name), 'w') as f: f.write(str(number_of_images))
def contextual_attention(f, b, mask=None, ksize=3, stride=1, rate=1, fuse_k=3, softmax_scale=10., training=True, fuse=True): """ Contextual attention layer implementation. Contextual attention is first introduced in publication: Generative Image Inpainting with Contextual Attention, Yu et al. Args: x: Input feature to match (foreground). t: Input feature for match (background). mask: Input mask for t, indicating patches not available. ksize: Kernel size for contextual attention. stride: Stride for extracting patches from t. rate: Dilation for matching. softmax_scale: Scaled softmax for attention. training: Indicating if current graph is training or inference. Returns: tf.Tensor: output """ # get shapes raw_fs = tf.shape(f) print("raw_fs",raw_fs.shape) raw_int_fs = f.get_shape().as_list() print("raw_int_fs",raw_int_fs) #foreground shape raw_int_bs = b.get_shape().as_list() print("raw_int_bs",raw_int_bs) #background shape ''' raw_fs (4,) raw_int_fs [2, 64, 64, 128] raw_int_bs [2, 64, 64, 128] ''' # extract patches from background with stride and rate kernel = 2*rate raw_w = tf.extract_image_patches( b, [1,kernel,kernel,1], [1,rate*stride,rate*stride,1], [1,1,1,1], padding='SAME') print("raw_w or extracted patches",raw_w) raw_w = tf.reshape(raw_w, [raw_int_bs[0], -1, kernel, kernel, raw_int_bs[3]]) raw_w = tf.transpose(raw_w, [0, 2, 3, 4, 1]) #####background patches. These patches have to be the kernels print("transposed raw_w or extracted patches",raw_w.shape) # transpose to b*k*k*c*hw # downscaling foreground option: downscaling both foreground and # background for matching and use original background for reconstruction. ######foreground and back ground need to be downscaled because? ### both are downscaled in same shape f = resize(f, scale=1./rate, func=tf.image.resize_nearest_neighbor) print("rdownscaled f",f.shape) b = resize(b, to_shape=[int(raw_int_bs[1]/rate), int(raw_int_bs[2]/rate)], func=tf.image.resize_nearest_neighbor) # https://github.com/tensorflow/tensorflow/issues/11651 print("rdownscaled b",b.shape) ''' rdownscaled f (2, 32, 32, 128) rdownscaled b (2, 32, 32, 128) ''' ####### if mask is not None: mask = resize(mask, scale=1./rate, func=tf.image.resize_nearest_neighbor) print("again resized mask",mask.shape) #again resized mask (1, 32, 32, 1) #########after downscaling fs = tf.shape(f) print("fs:",fs) #int_fs: [2, 32, 32, 128] int_fs = f.get_shape().as_list() print("int_fs:",int_fs) f_groups = tf.split(f, int_fs[0], axis=0) print("splitted f_groups:",f_groups) #tf.split(X, row = n, column = m) is used to split the data set of the variable into n number of pieces row wise and m numbers of pieces column wise. #For example, we have data_set x of size (10,10), then tf.split(x, 2, 0) will break the data_set of x in 2 set of size (5, 10) #but if we take tf.split(x, 2, 2), then we will get 4 sets of data of size (5, 5). # from t(H*W*C) to w(b*k*k*c*h*w) bs = tf.shape(b) print("bs:",bs) int_bs = b.get_shape().as_list() print("int_bs:",int_bs) w = tf.extract_image_patches( b, [1,ksize,ksize,1], [1,stride,stride,1], [1,1,1,1], padding='SAME') ''' w or extracted patches (2, 32, 32, 1152) transposed w or extracted patches (2, 3, 3, 128, 1024) extracted mask patch shape (1, 32, 32, 9) transposed extracted mask patch shape (3, 3, 1, 1024) temporary (1, 1, 1, 1024) mm shape (1, 1, 1, 1024) splitted w_groups: [<tf.Tensor 'inpaint_net/split_1:0' shape=(1, 3, 3, 128, 1024) dtype=float32>, <tf.Tensor 'inpaint_net/split_1:1' shape=(1, 3, 3, 128, 1024) dtype=float32>] splitted raw_w_groups: [<tf.Tensor 'inpaint_net/split_2:0' shape=(1, 4, 4, 128, 1024) dtype=float32>, <tf.Tensor 'inpaint_net/split_2:1' shape=(1, 4, 4, 128, 1024) dtype=float32>] yi shape (1, 32, 32, 1024) yi shape after multiplying mm Tensor("inpaint_net/mul_8:0", shape=(1, 32, 32, 1024), dtype=float32) yi after softmax shape (1, 32, 32, 1024) yi shape (1, 32, 32, 1024) yi shape after multiplying mm Tensor("inpaint_net/mul_16:0", shape=(1, 32, 32, 1024), dtype=float32) yi after softmax shape (1, 32, 32, 1024) x_hallu [Dimension(2), Dimension(256), Dimension(256), Dimension(3)] ''' print("w or extracted patches",w.shape) w = tf.reshape(w, [int_fs[0], -1, ksize, ksize, int_fs[3]]) w = tf.transpose(w, [0, 2, 3, 4, 1]) # transpose to b*k*k*c*hw print("transposed w or extracted patches",w.shape) ###again # process mask if mask is None: mask = tf.zeros([1, bs[1], bs[2], 1]) m = tf.extract_image_patches( mask, [1,ksize,ksize,1], [1,stride,stride,1], [1,1,1,1], padding='SAME') print("extracted mask patch shape",m.shape) m = tf.reshape(m, [1, -1, ksize, ksize, 1]) m = tf.transpose(m, [0, 2, 3, 4, 1]) # transpose to b*k*k*c*hw m = m[0] print("transposed extracted mask patch shape",m.shape) temporary=tf.equal(tf.reduce_mean(m, axis=[0,1,2], keep_dims=True), 0.) print('temporary',temporary.shape) mm = tf.cast(temporary, tf.float32) print("mm shape",mm.shape) w_groups = tf.split(w, int_bs[0], axis=0) raw_w_groups = tf.split(raw_w, int_bs[0], axis=0) print("splitted w_groups:",w_groups) print("splitted raw_w_groups:",raw_w_groups) y = [] offsets = [] k = fuse_k scale = softmax_scale fuse_weight = tf.reshape(tf.eye(k), [k, k, 1, 1]) for xi, wi, raw_wi in zip(f_groups, w_groups, raw_w_groups): # conv for compare wi = wi[0] wi_normed = wi / tf.maximum(tf.sqrt(tf.reduce_sum(tf.square(wi), axis=[0,1,2])), 1e-4) ## normalize each background patch yi = tf.nn.conv2d(xi, wi_normed, strides=[1,1,1,1], padding="SAME") # conv implementation for fuse scores to encourage large patches if fuse: yi = tf.reshape(yi, [1, fs[1]*fs[2], bs[1]*bs[2], 1]) yi = tf.nn.conv2d(yi, fuse_weight, strides=[1,1,1,1], padding='SAME') yi = tf.reshape(yi, [1, fs[1], fs[2], bs[1], bs[2]]) yi = tf.transpose(yi, [0, 2, 1, 4, 3]) yi = tf.reshape(yi, [1, fs[1]*fs[2], bs[1]*bs[2], 1]) yi = tf.nn.conv2d(yi, fuse_weight, strides=[1,1,1,1], padding='SAME') yi = tf.reshape(yi, [1, fs[2], fs[1], bs[2], bs[1]]) yi = tf.transpose(yi, [0, 2, 1, 4, 3]) yi = tf.reshape(yi, [1, fs[1], fs[2], bs[1]*bs[2]]) #pr=tf.Print(yi,[yi],"Jisa says:") print("yi shape",yi.shape) # softmax to match yi *= mm # mask print("yi shape after multiplying mm",yi) yi = tf.nn.softmax(yi*scale, 3) #pr=tf.Print(yi,[yi],"Jisa says:") yi *= mm # mask print("yi after softmax shape",yi.shape) offset = tf.argmax(yi, axis=3, output_type=tf.int32) offset = tf.stack([offset // fs[2], offset % fs[2]], axis=-1) # deconv for patch pasting # 3.1 paste center wi_center = raw_wi[0] yi = tf.nn.conv2d_transpose(yi, wi_center, tf.concat([[1], raw_fs[1:]], axis=0), strides=[1,rate,rate,1]) / 4. y.append(yi) offsets.append(offset) y = tf.concat(y, axis=0) y.set_shape(raw_int_fs) offsets = tf.concat(offsets, axis=0) offsets.set_shape(int_bs[:3] + [2]) # case1: visualize optical flow: minus current position h_add = tf.tile(tf.reshape(tf.range(bs[1]), [1, bs[1], 1, 1]), [bs[0], 1, bs[2], 1]) w_add = tf.tile(tf.reshape(tf.range(bs[2]), [1, 1, bs[2], 1]), [bs[0], bs[1], 1, 1]) offsets = offsets - tf.concat([h_add, w_add], axis=3) # to flow image flow = flow_to_image_tf(offsets) # # case2: visualize which pixels are attended # flow = highlight_flow_tf(offsets * tf.cast(mask, tf.int32)) if rate != 1: flow = resize(flow, scale=rate, func=tf.image.resize_nearest_neighbor) return y, flow
def image_to_patches(image, scale=1): patch_height = 108 / scale patch_width = 108 / scale patch_overlap = 12 / scale patches = tf.extract_image_patches(image, [1, patch_height, patch_width, 1], [1, patch_height - 2 * patch_overlap, patch_width - 2 * patch_overlap, 1], [1, 1, 1, 1], padding='VALID') return tf.reshape(patches, [tf.shape(patches)[0] * tf.shape(patches)[1] * tf.shape(patches)[2], patch_height, patch_width, 3])
net, ind = conv2d(net, 256, 512, '_conv9', ind, tiny) net, ind = conv2d(net, 512, 256, '_conv10', ind, tiny, size=1) net, ind = conv2d(net, 256, 512, '_conv11', ind, tiny) net, ind = conv2d(net, 512, 256, '_conv12', ind, tiny, size=1) net, ind = conv2d(net, 256, 512, '_conv13', ind, tiny) shortcut = net net = max_pool(net) net, ind = conv2d(net, 512, 1024, '_conv14', ind, tiny) net, ind = conv2d(net, 1024, 512, '_conv15', ind, tiny, size=1) net, ind = conv2d(net, 512, 1024, '_conv16', ind, tiny) net, ind = conv2d(net, 1024, 512, '_conv17', ind, tiny, size=1) net, ind = conv2d(net, 512, 1024, '_conv18', ind, tiny) net, ind = conv2d(net, 1024, 1024, '_conv19', ind, tiny) net, ind = conv2d(net, 1024, 1024, '_conv20', ind, tiny) shortcut, ind = conv2d(shortcut, 512, 64, '_shortcut', ind, tiny, size=1) shortcut = tf.extract_image_patches(shortcut, [1, 2, 2, 1], [1, 2, 2, 1], [1, 1, 1, 1], 'VALID') net = tf.concat([shortcut, net], axis=-1) net, ind = conv2d(net, 1280, 1024, '_conv21', ind, tiny) out, ind = conv2d(net, 1024, 425, '_conv22', ind, tiny, size=1, batchnorm=False) sess = tf.Session() sess.run(tf.global_variables_initializer())
def extract_patches(self, image): patches = tf.extract_image_patches(image, [1, self.psz, self.psz, 1], [1, self.stride, self.stride, 1], [1, 1, 1, 1], 'VALID') return patches
def __init__(self, init_image_batch, hyperparams, info): self._image_batch = init_image_batch # tf.summary.image('input', self._image_batch, max_outputs=5) k1 = hyperparams['k1'] k2 = hyperparams['k2'] l1 = hyperparams['l1'] with tf.name_scope("extract_patches1"): self.patches1 = tf.extract_image_patches(self._image_batch, [1, k1, k2, 1], [1, k1, k2, 1], [1, 1, 1, 1], padding='SAME', name='patches') self.patches1 = tf.reshape(self.patches1, [-1, k1 * k2 * info.N_CHANNELS], name='patches_shaped') self.numofpatches = self.patches1.get_shape()[0].value # TODO: figure out how to unvectorize for multi-channel images # self.patches1 = tf.reshape(self.patches1, [-1, info.N_CHANNELS, k1 * k2], name='patches_shaped') # self.patches1 = tf.transpose(self.patches1, [0, 2, 1]) # self.zero_mean_patches1 = self.patches1 - tf.reduce_mean(self.patches1, axis=1, keep_dims=True, name='patch_means') self.zero_mean_patches1 = self.patches1 x1 = tf.transpose(self.zero_mean_patches1, [1, 0]) x1_trans = self.zero_mean_patches1 self.patches_covariance1 = tf.matmul(x1, x1_trans, name='patch_covariance') with tf.name_scope("eignvalue_decomposition1"): self.x_eig_vals1, self.x_eig1 = tf.self_adjoint_eig( self.patches_covariance1, name='x_eig') self.top_x_eig1_ori = tf.reverse(self.x_eig1, axis=[-1])[:, 0:l1] # self.top_x_eig1 = tf.transpose(tf.reshape(self.top_x_eig1_ori, [info.N_CHANNELS, k1, k2, l1]), [2, 1, 0, 3]) # self.top_x_eig2 = tf.transpose(image.rotate(tf.reshape(self.top_x_eig1_ori, [info.N_CHANNELS, k1, k2, l1]), np.pi), [2, 1, 3, 0]) # self.top_x_eig_vals1 = tf.expand_dims(tf.reverse(self.x_eig_vals1, axis=[-1])[0:l1], axis=1) # # self.top_x_eig_vals1 = self.x_eig_vals1[0:l1] # self.filt1_viz = tf.transpose(self.top_x_eig1, [3, 0, 1, 2]) # tf.summary.image('filt1', self.filt1_viz, max_outputs=l1) # # with tf.name_scope("convolution1"): # self.conv1 = tf.nn.conv2d(self._image_batch, self.top_x_eig1, [1, 1, 1, 1], padding='SAME') # # self.conv1 = tf.transpose(self.conv1, [3, 0, 1, 2]) # # conv1 is now (l1, batch_size, img_w, img_h) # self.conv1_batch = tf.expand_dims(tf.reshape(self.conv1, [-1, info.IMAGE_W, info.IMAGE_H]), axis=3) # # conv1 batch is (l1 * batch_size, img_w, img_h) # # tf.summary.image('conv1', self.conv1_batch, max_outputs=l1) # # with tf.name_scope("normalization_of_convolution"): # self.conv1_flatten = tf.reshape(self.conv1, [l1, info.batch_size * info.IMAGE_W * info.IMAGE_H]) # self.eigen_vals = tf.tile(self.top_x_eig_vals1, [1, info.batch_size * info.IMAGE_W * info.IMAGE_H]) # self.conv1_div_vals = tf.divide(self.conv1_flatten, tf.sqrt(tf.sqrt(self.eigen_vals))) # self.conv1_output = tf.transpose(tf.reshape(self.conv1_div_vals, [l1, info.batch_size, info.IMAGE_W, info.IMAGE_H]), [1, 2, 3, 0]) # self.outputs = tf.nn.conv2d(self.conv1_output, self.top_x_eig2, [1, 1, 1, 1], padding='SAME') # # self.outputs = self.conv1_flatten # We proved that mse_loss = Sum(eigen_vals), thus we do not need any convolutions ops. Modified at 5.24 14:38 with tf.name_scope('MSE_Scaling_Op'): self.eigen_val_sum = tf.reduce_sum(self.top_x_eig_vals1) self.outputs = tf.sqrt(1.0 / self.numofpatches * self.top_x_eig_vals1) with tf.name_scope('Decomposition_maps'): self.decompatches = [] for i in range(k1 * k2): self.decompatches.append( tf.matmul( tf.matmul( self.zero_mean_patches1, tf.expand_dims(self.top_x_eig1_ori[:, i], axis=1)), tf.expand_dims(self.top_x_eig1_ori[:, i], axis=0)))
def style_swap(content, style, patch_size, stride): '''Efficiently swap content feature patches with nearest-neighbor style patches Original paper: https://arxiv.org/abs/1612.04337 Adapted from: https://github.com/rtqichen/style-swap/blob/master/lib/NonparametricPatchAutoencoderFactory.lua ''' nC = tf.shape(style)[ -1] # Num channels of input content feature and style-swapped output # Extract patches from style image that will be used for conv/deconv layers style_patches = tf.extract_image_patches(style, [1, patch_size, patch_size, 1], [1, stride, stride, 1], [1, 1, 1, 1], 'VALID') before_reshape = tf.shape( style_patches) # NxRowsxColsxPatch_size*Patch_size*nC style_patches = tf.reshape( style_patches, [before_reshape[1] * before_reshape[2], patch_size, patch_size, nC]) style_patches = tf.transpose( style_patches, [1, 2, 3, 0]) # Patch_sizexPatch_sizexIn_CxOut_c # Normalize each style patch style_patches_norm = tf.nn.l2_normalize(style_patches, dim=3) # Compute cross-correlation/nearest neighbors of patches by using style patches as conv filters ss_enc = tf.nn.conv2d(content, style_patches_norm, [1, stride, stride, 1], 'VALID') # For each spatial position find index of max along channel/patch dim ss_argmax = tf.argmax(ss_enc, axis=3) encC = tf.shape(ss_enc)[ -1] # Num channels in intermediate conv output, same as # of patches # One-hot encode argmax with same size as ss_enc, with 1's in max channel idx for each spatial pos ss_oh = tf.one_hot(ss_argmax, encC, 1., 0., 3) # Calc size of transposed conv out deconv_out_H = utils.deconv_output_length( tf.shape(ss_oh)[1], patch_size, 'valid', stride) deconv_out_W = utils.deconv_output_length( tf.shape(ss_oh)[2], patch_size, 'valid', stride) deconv_out_shape = tf.stack([1, deconv_out_H, deconv_out_W, nC]) # Deconv back to original content size with highest matching (unnormalized) style patch swapped in # for each content patch ss_dec = tf.nn.conv2d_transpose(ss_oh, style_patches, deconv_out_shape, [1, stride, stride, 1], 'VALID') # Interpolate to average overlapping patch locations ss_oh_sum = tf.reduce_sum(ss_oh, axis=3, keep_dims=True) filter_ones = tf.ones([patch_size, patch_size, 1, 1], dtype=tf.float32) deconv_out_shape = tf.stack( [1, deconv_out_H, deconv_out_W, 1]) # Same spatial size as ss_dec with 1 channel counting = tf.nn.conv2d_transpose(ss_oh_sum, filter_ones, deconv_out_shape, [1, stride, stride, 1], 'VALID') counting = tf.tile( counting, [1, 1, 1, nC]) # Repeat along channel dim to make same size as ss_dec interpolated_dec = tf.divide(ss_dec, counting) return interpolated_dec
def run_style_transfer(self, content_path, style_path, content_map_path="", style_map_path="", num_iterations=100, content_weight=1e4, style_weight=1e-2, trans_weight=0): # We don't need to (or want to) train any layers of our model, so we set their # trainable to false. model = self.get_model() for layer in model.layers: layer.trainable = False # Get the style and content feature representations (from our specified intermediate layers) style_features, content_features = self.get_feature_representations( model, content_path, style_path) style_map_features, _ = self.get_feature_representations( model, content_map_path, style_map_path) content_map_features, _ = self.get_feature_representations( model, style_map_path, content_map_path) size = 5 stride = 4 base_style_patches = [] i = 0 style_img = load_img(style_path) for style_feat_img, style_map_img in zip(style_features, style_map_features): print(style_feat_img.shape) print(style_map_img.shape) style_feat_img = tf.concat([style_feat_img, style_map_img], -1) print(style_feat_img.shape) li = tf.squeeze( tf.extract_image_patches(tf.expand_dims(style_feat_img, axis=0), ksizes=[1, size, size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID'), 0) li = tf.reshape( li, [((style_feat_img.shape[0] - size) // stride + 1) * ((style_feat_img.shape[1] - size) // stride + 1), -1]) # li = tf.reshape(li, [(style_feat_img.shape[0] - 2) * (style_feat_img.shape[1] - 2), -1]) base_style_patches.append(li) # print( i,len( base_style_patches[i] ), base_style_patches[i][0] ) i += 1 # print(len(base_style_patches)) # Set initial image init_image = load_noise_img(load_and_process_img(content_path)) init_image = load_and_process_img(content_path) init_image = tfe.Variable(init_image, dtype=tf.float32) # Create our optimizer opt = tf.train.AdamOptimizer(learning_rate=50, beta1=0.99, epsilon=1e-1) # For displaying intermediate images iter_count = 1 # Store our best result best_loss, best_img = float('inf'), None # Create a nice config loss_weights = (style_weight, content_weight, trans_weight) cfg = { 'model': model, 'loss_weights': loss_weights, 'init_image': init_image, 'base_style_patches': base_style_patches, 'content_features': content_features, 'content_map_features': content_map_features } # For displaying num_rows = 2 num_cols = 10 display_interval = num_iterations / (num_rows * num_cols) start_time = time.time() global_start = time.time() norm_means = np.array([103.939, 116.779, 123.68]) min_vals = -norm_means max_vals = 255 - norm_means imgs = [] for i in range(num_iterations): print("himmat rakho") grads, all_loss = self.compute_grads(cfg) print("gradient aega") loss, style_score, content_score, trans_score = all_loss opt.apply_gradients([(grads, init_image)]) print("gradient agya") clipped = tf.clip_by_value(init_image, min_vals, max_vals) # print("II 1",init_image) init_image.assign(clipped) # print("II 2",cfg['init_image']) end_time = time.time() if loss < best_loss: # Update best loss and best image from total loss. best_loss = loss best_img = deprocess_img(init_image.numpy()) if i % 1 == 0: start_time = time.time() # Use the .numpy() method to get the concrete numpy array plot_img = init_image.numpy() plot_img = deprocess_img(plot_img) if i % display_interval == 0: imgs.append(plot_img) print('Iteration: {}'.format(i)) print('Total loss: {:.4e}, ' 'style loss: {:.4e}, ' 'content loss: {:.4e}, ' 'trans loss: {:.4e}, ' 'time: {:.4f}s'.format(loss, style_score, content_score, trans_score, time.time() - start_time)) print('Total time: {:.4f}s'.format(time.time() - global_start)) plt.figure(figsize=(14, 4)) for i, img in enumerate(imgs): plt.subplot(num_rows, num_cols, i + 1) plt.imshow(img) plt.xticks([]) plt.yticks([]) plt.savefig(self.results + content_path + '_inter.jpg') return best_img, best_loss
def gmm_unsupervised_init(sim_op, templates_var, weights_var): """Initialize a similarity layer using gmm unsupervised learning Initializes the templates and weights using gmm The function returns two ops. The first is used to initialize the learning and the second should be run iteratively with all the data. Parameters ---------- sim_op : tf.Operation | tf.Tensor the similarity operation (or the tensor which is the output of the similarity) templates_var : tf.Variable the templates variable for this similarity layer weights_var : tf.Variable the weights variable for this similarity layer Returns ------- A tuple (init_op, update_op) where init_op must be executed by a session before using the update op and the update_op is the operation that performs the learning. """ if isinstance(sim_op, tf.Tensor): sim_op = sim_op.op if not sim_op.type == 'Similarity': raise ValueError( 'kmeans_unsupervised_init needs a similarity op, got %s instead' % sim_op.type) assert (isinstance(sim_op, tf.Operation)) name = sim_op.name + '_gmm_init' with tf.name_scope(name): input_tensor = sim_op.inputs[0] templates_tensor = sim_op.inputs[1] num_instances = templates_tensor.get_shape().as_list()[0] strides = sim_op.get_attr('strides') blocks = sim_op.get_attr('blocks') strides = [1, strides[0], strides[1], 1] blocks = [1, blocks[0], blocks[1], 1] patches = tf.extract_image_patches(tf.transpose( input_tensor, (0, 2, 3, 1)), strides=strides, blocks=blocks, rates=[1, 1, 1, 1], padding='VALID') _, _, _, patch_size = patches.get_shape().as_list() patches = tf.reshape(patches, [-1, patch_size]) _, _, _, training_op = _gmm(inp=patches, initial_clusters='random', random_seed=33, num_clusters=num_instances, covariance_type='diag', params='mc') clusters_var = [ v for v in tf.global_variables() if v.name == name + '/' + 'clusters:0' ][0] clusters = clusters_var.op.outputs[0] clusters_covs_var = [ v for v in tf.global_variables() if v.name == name + '/' + 'clusters_covs:0' ][0] clusters_covs = clusters_covs_var.op.outputs[0] # this hacky code makes sure that the gmm code does not add a variable initializer that depends # on the input, which is usually a placeholder. without it, the global intializer must be run with # a feed dict, which dows work for keras, and is weird for other code non_gmm_vars = [ v for v in tf.global_variables() if not v.name.startswith(name) ] gmm_vars = [ v for v in tf.global_variables() if v.name.startswith(name) ] graph = tf.get_default_graph() graph.clear_collection(tf.GraphKeys.GLOBAL_VARIABLES) for v in non_gmm_vars: graph.add_to_collection(tf.GraphKeys.GLOBAL_VARIABLES, v) initializer = tf.group(*[v.initializer for v in gmm_vars]) channels, block_rows, block_cols = templates_tensor.get_shape( ).as_list()[1:] reshaped_clusters = tf.reshape( clusters, (num_instances, block_rows, block_cols, channels)) reshaped_covs = tf.reshape( clusters_covs, (num_instances, block_rows, block_cols, channels)) transposed_clusters = tf.transpose(reshaped_clusters, [0, 3, 1, 2]) transposed_covs = tf.sqrt(tf.transpose(reshaped_covs, [0, 3, 1, 2])) with tf.control_dependencies([training_op]): assign1 = tf.assign(templates_var, transposed_clusters) assign2 = tf.assign(weights_var, transposed_covs) return initializer, tf.group(assign1, assign2, name='gmm_init_assign')
def tf_histogram_of_oriented_gradients(img_raw, x_kernel=[-1.0, 0.0, 1.0], y_kernel=[-1.0, 0.0, 1.0], pixels_in_cell=8, cells_in_block=1, n_angle_bins=9): ## COMPUTE GRADIENT MAGNITUDES/ORIENTATIONS img_raw = img_raw if len(img_raw.shape) == 4 else tf.expand_dims( img_raw, 0) # convert single image to batch of 1 img = tf.to_float(img_raw) # convert int pixel values to float x_kernel = tf.reshape(x_kernel, [3, 1, 1, 1]) # x kernel is a row matrix y_kernel = tf.reshape(y_kernel, [1, 3, 1, 1]) # y kernel is a column matrix x_grad_by_c = tf.nn.depthwise_conv2d( img, tf.tile(x_kernel, [1, 1, 3, 1]), [1, 1, 1, 1], "SAME") # computing channel x/y gradients by convolution y_grad_by_c = tf.nn.depthwise_conv2d( img, tf.tile(y_kernel, [1, 1, 3, 1]), [1, 1, 1, 1], "SAME") # (kernel tiled across 3 RGB channels) grad_mag_by_c = tf.sqrt(tf.square(x_grad_by_c) + tf.square(y_grad_by_c)) # gradient magnitude grad_ang_by_c = tf.atan2(y_grad_by_c, x_grad_by_c) # gradient orientation grad_mag = tf.reduce_max(grad_mag_by_c, axis=-1) # select largest channel gradient grad_ang = tf.reduce_sum(grad_ang_by_c * tf.one_hot(tf.argmax(grad_mag_by_c, axis=-1), 3), axis=-1) # select corresponding orientation ## GROUP VALUES INTO CELLS (8x8) p = pixels_in_cell grad_mag = tf.extract_image_patches(tf.expand_dims(grad_mag, -1), [1, p, p, 1], [1, p, p, 1], [1, 1, 1, 1], "VALID") grad_ang = tf.extract_image_patches(tf.expand_dims(grad_ang, -1), [1, p, p, 1], [1, p, p, 1], [1, 1, 1, 1], "VALID") ## COMPUTE CELL HISTOGRAMS bin_width = np.pi / n_angle_bins grad_ang = tf.mod(grad_ang, np.pi) # unsigned gradients grad_ang_idx = grad_ang / bin_width lo_bin = tf.floor(grad_ang_idx) hi_bin = lo_bin + 1 lo_weight = (hi_bin - grad_ang_idx) * grad_mag hi_weight = (grad_ang_idx - lo_bin) * grad_mag hi_bin = tf.mod(hi_bin, n_angle_bins) lo_bin = tf.to_int32(lo_bin) hi_bin = tf.to_int32(hi_bin) cell_hogs = tf.reduce_sum( tf.one_hot(lo_bin, n_angle_bins) * tf.expand_dims(lo_weight, -1) + tf.one_hot(hi_bin, n_angle_bins) * tf.expand_dims(hi_weight, -1), -2) ## ASSEMBLE AND NORMALIZE BLOCK HISTOGRAM VECTORS unnormalized_hog = tf.extract_image_patches( cell_hogs, [1, cells_in_block, cells_in_block, 1], [1, 1, 1, 1], [1, 1, 1, 1], "VALID") hog_descriptor = tf.reshape(tf.nn.l2_normalize(unnormalized_hog, -1), [unnormalized_hog.shape[0], -1]) return cell_hogs, hog_descriptor
def crnn(tensor, kernel_size, stride, out_channels, rnn_n_layers, rnn_type, bidirectional, w_std, padding, scope_name): with tf.variable_scope( scope_name, initializer=tf.truncated_normal_initializer(stddev=w_std)): # Expand to have 4 dimensions if needed if len(tensor.shape) == 3: tensor = tf.expand_dims(tensor, 3) # Extract the patches (returns [batch, time-steps, 1, patch content flattened]) batch_size = tensor.shape[0].value n_in_features = tensor.shape[2].value patches = tf.extract_image_patches( images=tensor, ksizes=[1, kernel_size, n_in_features, 1], strides=[1, stride, n_in_features, 1], rates=[1, 1, 1, 1], padding=padding) patches = patches[:, :, 0, :] # Reshape to do: # 1) reshape the flattened patches back to [kernel_size, n_in_features] # 2) combine the batch and time-steps dimensions (which will be the new 'batch' size, for the RNN) # now shape will be [batch * time-steps, kernel_size, n_features] time_steps_after_stride = patches.shape[1].value patches = tf.reshape( patches, [batch_size * time_steps_after_stride, kernel_size, n_in_features]) # Transpose and convert to a list, to fit the tf.contrib.rnn.static_rnn requirements # Now will be a list of length kernel_size, each element of shape [batch * time-steps, n_features] patches = tf.unstack(tf.transpose(patches, [1, 0, 2])) # Create the RNN Cell if rnn_type == 'simple': rnn_cell_func = tf.contrib.rnn.BasicRNNCell elif rnn_type == 'lstm': rnn_cell_func = tf.contrib.rnn.LSTMBlockCell elif rnn_type == 'gru': rnn_cell_func = tf.contrib.rnn.GRUBlockCell if not bidirectional: rnn_cell = rnn_cell_func(out_channels) else: rnn_cell_f = rnn_cell_func(out_channels) rnn_cell_b = rnn_cell_func(out_channels) # Multilayer RNN? (does not appear in the original paper) if rnn_n_layers > 1: if not bidirectional: rnn_cell = tf.contrib.rnn.MultiRNNCell([rnn_cell] * rnn_n_layers) else: rnn_cell_f = tf.contrib.rnn.MultiRNNCell([rnn_cell_f] * rnn_n_layers) rnn_cell_b = tf.contrib.rnn.MultiRNNCell([rnn_cell_b] * rnn_n_layers) # The RNN itself if not bidirectional: outputs, state = tf.contrib.rnn.static_rnn(rnn_cell, patches, dtype=tf.float32) else: outputs, output_state_fw, output_state_bw = tf.contrib.rnn.static_bidirectional_rnn( rnn_cell_f, rnn_cell_b, patches, dtype=tf.float32) # Use only the output of the last time-step (shape will be [batch * time-steps, out_channels]). # In the case of a bidirectional RNN, we want to take the last time-step of the forward RNN, # and the first time-step of the backward RNN. if not bidirectional: outputs = outputs[-1] else: half = int(outputs[0].shape.as_list()[-1] / 2) outputs = tf.concat([outputs[-1][:, :half], outputs[0][:, half:]], axis=1) # Expand the batch * time-steps back (shape will be [batch_size, time_steps, out_channels] if bidirectional: out_channels = 2 * out_channels outputs = tf.reshape( outputs, [batch_size, time_steps_after_stride, out_channels]) return outputs
def compute_stats(self, loss_sampled, var_list=None): varlist = var_list if varlist is None: varlist = tf.trainable_variables() gs = tf.gradients(loss_sampled, varlist, name='gradientsSampled') self.gs = gs factors = self.getFactors(gs, varlist) stats = self.getStats(factors, varlist) updateOps = [] statsUpdates = {} statsUpdates_cache = {} for var in varlist: opType = factors[var]['opName'] fops = factors[var]['op'] fpropFactor = factors[var]['fpropFactors_concat'] fpropStats_vars = stats[var]['fprop_concat_stats'] bpropFactor = factors[var]['bpropFactors_concat'] bpropStats_vars = stats[var]['bprop_concat_stats'] SVD_factors = {} for stats_var in fpropStats_vars: stats_var_dim = int(stats_var.get_shape()[0]) if stats_var not in statsUpdates_cache: old_fpropFactor = fpropFactor B = (tf.shape(fpropFactor)[0]) # batch size if opType == 'Conv2D': strides = fops.get_attr("strides") padding = fops.get_attr("padding") convkernel_size = var.get_shape()[0:3] KH = int(convkernel_size[0]) KW = int(convkernel_size[1]) C = int(convkernel_size[2]) flatten_size = int(KH * KW * C) Oh = int(bpropFactor.get_shape()[1]) Ow = int(bpropFactor.get_shape()[2]) if Oh == 1 and Ow == 1 and self._channel_fac: # factorization along the channels # assume independence among input channels # factor = B x 1 x 1 x (KH xKW x C) # patches = B x Oh x Ow x (KH xKW x C) if len(SVD_factors) == 0: # find closest rank-1 approx to the feature map S, U, V = tf.batch_svd(tf.reshape( fpropFactor, [-1, KH * KW, C])) # get rank-1 approx slides sqrtS1 = tf.expand_dims(tf.sqrt(S[:, 0, 0]), 1) patches_k = U[:, :, 0] * sqrtS1 # B x KH*KW full_factor_shape = fpropFactor.get_shape() patches_k.set_shape( [full_factor_shape[0], KH * KW]) patches_c = V[:, :, 0] * sqrtS1 # B x C patches_c.set_shape([full_factor_shape[0], C]) SVD_factors[C] = patches_c SVD_factors[KH * KW] = patches_k fpropFactor = SVD_factors[stats_var_dim] else: # poor mem usage implementation patches = tf.extract_image_patches(fpropFactor, ksizes=[1, convkernel_size[ 0], convkernel_size[1], 1], strides=strides, rates=[1, 1, 1, 1], padding=padding) if self._approxT2: # T^2 terms * 1/T^2, size: B x C fpropFactor = tf.reduce_mean(patches, [1, 2]) else: # size: (B x Oh x Ow) x C fpropFactor = tf.reshape( patches, [-1, flatten_size]) / Oh / Ow fpropFactor_size = int(fpropFactor.get_shape()[-1]) if stats_var_dim == (fpropFactor_size + 1) and not self._blockdiag_bias: if opType == 'Conv2D' and not self._approxT2: # correct padding for numerical stability (we # divided out OhxOw from activations for T1 approx) fpropFactor = tf.concat([fpropFactor, tf.ones( [tf.shape(fpropFactor)[0], 1]) / Oh / Ow], 1) else: # use homogeneous coordinates fpropFactor = tf.concat( [fpropFactor, tf.ones([tf.shape(fpropFactor)[0], 1])], 1) # average over the number of data points in a batch # divided by B cov = tf.matmul(fpropFactor, fpropFactor, transpose_a=True) / tf.cast(B, tf.float32) updateOps.append(cov) statsUpdates[stats_var] = cov if opType != 'Conv2D': # HACK: for convolution we recompute fprop stats for # every layer including forking layers statsUpdates_cache[stats_var] = cov for stats_var in bpropStats_vars: stats_var_dim = int(stats_var.get_shape()[0]) if stats_var not in statsUpdates_cache: old_bpropFactor = bpropFactor bpropFactor_shape = bpropFactor.get_shape() B = tf.shape(bpropFactor)[0] # batch size C = int(bpropFactor_shape[-1]) # num channels if opType == 'Conv2D' or len(bpropFactor_shape) == 4: if fpropFactor is not None: if self._approxT2: bpropFactor = tf.reduce_sum( bpropFactor, [1, 2]) # T^2 terms * 1/T^2 else: bpropFactor = tf.reshape( bpropFactor, [-1, C]) * Oh * Ow # T * 1/T terms else: # just doing block diag approx. spatial independent # structure does not apply here. summing over # spatial locations bpropFactor = tf.reduce_sum(bpropFactor, [1, 2]) # assume sampled loss is averaged. TO-DO:figure out better # way to handle this bpropFactor *= tf.to_float(B) ## cov_b = tf.matmul( bpropFactor, bpropFactor, transpose_a=True) / tf.to_float(tf.shape(bpropFactor)[0]) updateOps.append(cov_b) statsUpdates[stats_var] = cov_b statsUpdates_cache[stats_var] = cov_b self.statsUpdates = statsUpdates return statsUpdates
def tf_hog_descriptor(images, cell_size=8, block_size=2, block_stride=1, n_bins=9, grayscale=False): batch_size, height, width, depth = images.shape batch_size = tf.shape(images)[0] scale_factor = tf.constant(180 / n_bins, name="scale_factor", dtype=tf.float32) img = images if grayscale and depth == 3: img = tf.image.rgb_to_grayscale(img, name="ImgGray") # automatically padding height and width to valid size (multiples of cell size) if height % cell_size != 0 or width % cell_size != 0: height = height + (cell_size - (height % cell_size)) % cell_size width = width + (cell_size - (width % cell_size)) % cell_size img = tf.image.resize_image_with_crop_or_pad(img, height, width) # gradients grad = tf_deriv(img) g_x = grad[:, :, :, 0::2] g_y = grad[:, :, :, 1::2] # masking unwanted gradients of edge pixels mask_depth = 1 if grayscale else depth g_x_mask = np.ones((1, height, width, mask_depth)) g_y_mask = np.ones((1, height, width, mask_depth)) g_x_mask[:, :, (0, -1)] = 0 g_y_mask[:, (0, -1)] = 0 g_x_mask = tf.constant(g_x_mask, dtype=tf.float32) g_y_mask = tf.constant(g_y_mask, dtype=tf.float32) g_x_mask = tf.tile(g_x_mask, [batch_size, 1, 1, 1]) g_y_mask = tf.tile(g_y_mask, [batch_size, 1, 1, 1]) g_x = g_x * g_x_mask g_y = g_y * g_y_mask # maximum norm gradient selection g_norm = tf.add(tf.abs(g_x), tf.abs(g_y), "GradNorm") if not grayscale and depth != 1: # maximum norm gradient selection idx = tf.argmax(g_norm, 3) g_norm = tf.expand_dims(tf_select_by_idx(g_norm, idx, grayscale), -1) g_x = tf.expand_dims(tf_select_by_idx(g_x, idx, grayscale), -1) g_y = tf.expand_dims(tf_select_by_idx(g_y, idx, grayscale), -1) g_dir = tf_rad2deg(tf.atan2(g_y, g_x)) % 180 g_bin = tf.to_int32(g_dir / scale_factor, name="Bins") # cells partitioning cell_norm = tf.space_to_depth(g_norm, cell_size, name="GradCells") cell_bins = tf.space_to_depth(g_bin, cell_size, name="BinsCells") # cells histograms hist = list() zero = tf.zeros_like(cell_bins, dtype=tf.float32) for i in range(n_bins): mask = tf.equal(cell_bins, tf.constant(i, name="%i" % i)) hist.append(tf.reduce_mean(tf.where(mask, cell_norm, zero), 3)) hist = tf.transpose(tf.stack(hist), [1, 2, 3, 0], name="Hist") # blocks partitioning block_hist = tf.extract_image_patches( hist, ksizes=[1, block_size, block_size, 1], strides=[1, block_stride, block_stride, 1], rates=[1, 1, 1, 1], padding='VALID', name="BlockHist") # block normalization block_hist = tf.nn.l2_normalize(block_hist, 3, epsilon=1.0) # HOG descriptor hog_descriptor = tf.reshape(block_hist, [batch_size, int(block_hist.get_shape()[1]) * \ int(block_hist.get_shape()[2]) * \ int(block_hist.get_shape()[3])], name='HOGDescriptor') return hog_descriptor
verbose=1, validation_data=(x_val, y_val)) score = model.evaluate(x_test, y_test, verbose=0) print('Test loss:', score[0]) print('Test accuracy:', score[1]) #%%predict for i in range(9, 11): qa, b4, b3, b2 = df['BQA'].iloc[i], df['B4'].iloc[i], df['B3'].iloc[i], df[ 'B2'].iloc[i] img_t = img_preprocess(qa, b4, b3, b2, tdimn)[0] print(img_t.shape) with tf.Session() as sess: patches3 = tf.extract_image_patches(images=img_t, ksizes=[1, pn, pn, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') patches3 = tf.reshape(patches3, [-1, pn, pn, 3]) val3 = sess.run(patches3) d = {0: 0, 1: 1} mask_n = np.zeros(tdimn) z = 0 y = 0 batch = tdimn[0] while (y < tdimn[0]): pred = model.predict(val3[z:z + batch, :, :, :]) for i in xrange(tdimn[0]): mask_n[i, y] = d[np.argmax(pred[i])] z += batch
if i==0andj==0: parts = part else: parts = tf.concat([parts,part], 0) return parts """ print(x_batch) print(test_x_batch) #add patch_size_t = 180 stride = 30 patch_size_v = 80 patch_t = tf.extract_image_patches(x_batch, ksizes=[1, patch_size_t, patch_size_t, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID') patch_v = tf.extract_image_patches(test_x_batch, ksizes=[1, patch_size_v, patch_size_v, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID') print("patch_t shape", patch_t.shape) print("patch_v shape", patch_v.shape) def parts(patch, size): for i in range(patch.shape[0]): one_patch = patch[i, :, :, :]
#%% plt.figure(figsize=(40, 20)) plt.subplot(131) plt.imshow(misc.imrotate(np.squeeze(orig1, axis=2), 18, interp='nearest'), cmap=plt.get_cmap('gist_gray')) plt.imshow(mask, cmap=plt.get_cmap('Reds'), alpha=0.5) plt.title('Cloud-2') plt.show() #%%patch extraction img = np.reshape(img, (1, mask.shape[0], mask.shape[1], 3)) img2 = np.reshape(mask, (1, mask.shape[0], mask.shape[1], 1)) with tf.Session() as sess: patches = tf.extract_image_patches(images=img, ksizes=[1, 16, 16, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') patches = tf.reshape(patches, [-1, 16, 16, 3]) patches2 = tf.extract_image_patches(images=img2, ksizes=[1, 16, 16, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='SAME') patches2 = tf.reshape(patches2, [-1, 16, 16, 1]) val = sess.run(patches) val2 = sess.run(patches2) #%%data preparation
def conv_capsule_mat(input_tensor, input_activation, input_dim, output_dim, layer_name, num_routing=3, num_in_atoms=3, num_out_atoms=3, stride=2, kernel_size=5, min_var=0.0005, final_beta=1.0): """Convolutional Capsule layer with Pose Matrices.""" print('caps conv stride: {}'.format(stride)) in_atom_sq = num_in_atoms * num_in_atoms with tf.variable_scope(layer_name): input_shape = tf.shape(input_tensor) _, _, _, in_height, in_width = input_tensor.get_shape() # This Variable will hold the state of the weights for the layer kernel = utils.weight_variable(shape=[ input_dim, kernel_size, kernel_size, num_in_atoms, output_dim * num_out_atoms ], stddev=0.3) # kernel = tf.clip_by_norm(kernel, 3.0, axes=[1, 2, 3]) activation_biases = utils.bias_variable( [1, 1, output_dim, 1, 1, 1, 1, 1], init_value=0.5, name='activation_biases') sigma_biases = utils.bias_variable([1, 1, output_dim, 1, 1, 1, 1, 1], init_value=.5, name='sigma_biases') with tf.name_scope('conv'): print('convi;') # input_tensor: [x,128,8, c1,c2] -> [x*128,8, c1,c2] print(input_tensor.get_shape()) input_tensor_reshaped = tf.reshape(input_tensor, [ input_shape[0] * input_dim * in_atom_sq, input_shape[3], input_shape[4], 1 ]) input_tensor_reshaped.set_shape( (None, input_tensor.get_shape()[3].value, input_tensor.get_shape()[4].value, 1)) input_act_reshaped = tf.reshape(input_activation, [ input_shape[0] * input_dim, input_shape[3], input_shape[4], 1 ]) input_act_reshaped.set_shape( (None, input_tensor.get_shape()[3].value, input_tensor.get_shape()[4].value, 1)) print(input_tensor_reshaped.get_shape()) # conv: [x*128,out*out_at, c3,c4] conv_patches = tf.extract_image_patches( images=input_tensor_reshaped, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID', ) act_patches = tf.extract_image_patches( images=input_act_reshaped, ksizes=[1, kernel_size, kernel_size, 1], strides=[1, stride, stride, 1], rates=[1, 1, 1, 1], padding='VALID', ) o_height = (in_height.value - kernel_size) // stride + 1 o_width = (in_width.value - kernel_size) // stride + 1 patches = tf.reshape(conv_patches, (input_shape[0], input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patches.set_shape((None, input_dim, in_atom_sq, o_height, o_width, kernel_size, kernel_size)) patch_trans = tf.transpose(patches, [1, 5, 6, 0, 3, 4, 2]) patch_split = tf.reshape( patch_trans, (input_dim, kernel_size, kernel_size, input_shape[0] * o_height * o_width * num_in_atoms, num_in_atoms)) patch_split.set_shape( (input_dim, kernel_size, kernel_size, None, num_in_atoms)) a_patches = tf.reshape(act_patches, (input_shape[0], input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size)) a_patches.set_shape((None, input_dim, 1, 1, o_height, o_width, kernel_size, kernel_size)) with tf.name_scope('input_act'): utils.activation_summary( tf.reduce_sum(tf.reduce_sum(tf.reduce_sum(a_patches, axis=1), axis=-1), axis=-1)) with tf.name_scope('Wx'): wx = tf.matmul(patch_split, kernel) wx = tf.reshape(wx, (input_dim, kernel_size, kernel_size, input_shape[0], o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx.set_shape( (input_dim, kernel_size, kernel_size, None, o_height, o_width, num_in_atoms * num_out_atoms, output_dim)) wx = tf.transpose(wx, [3, 0, 7, 6, 4, 5, 1, 2]) utils.activation_summary(wx) with tf.name_scope('routing'): # Routing # logits: [x, 128, 10, c3, c4] logit_shape = [ input_dim, output_dim, 1, o_height, o_width, kernel_size, kernel_size ] activation, center = update_conv_routing( wx=wx, input_activation=a_patches, activation_biases=activation_biases, sigma_biases=sigma_biases, logit_shape=logit_shape, num_out_atoms=num_out_atoms * num_out_atoms, input_dim=input_dim, num_routing=num_routing, output_dim=output_dim, min_var=min_var, final_beta=final_beta, ) # activations: [x, 10, 8, c3, c4] out_activation = tf.squeeze(activation, axis=[1, 3, 6, 7]) out_center = tf.squeeze(center, axis=[1, 6, 7]) with tf.name_scope('center'): utils.activation_summary(out_center) return tf.sigmoid(out_activation), out_center
def forward(self,inputs, grid,is_training=True, reuse=False): def preprocessing(inputs): dims = inputs.get_shape() if len(dims) == 3: inputs = tf.expand_dims(inputs, dim=0) mean_BGR = tf.reshape(self.mean_BGR, [1, 1, 1, 3]) inputs = inputs[:, :, :, ::-1] + mean_BGR return inputs ## -----------------------depth and normal FCN-------------------------- inputs = preprocessing(inputs) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.relu, stride=1, padding='SAME', weights_initializer=weight_from_caffe(self.pretrain_weight), biases_initializer=bias_from_caffe(self.pretrain_weight)): with tf.variable_scope('fcn', reuse=reuse): ##---------------------vgg depth------------------------------------ conv1 = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') pool1 = slim.max_pool2d(conv1, [3, 3], stride=2, padding='SAME', scope='pool1') conv2 = slim.repeat(pool1, 2, slim.conv2d, 128, [3, 3], scope='conv2') pool2 = slim.max_pool2d(conv2, [3, 3], stride=2, padding='SAME', scope='pool2') conv3 = slim.repeat(pool2, 3, slim.conv2d, 256, [3, 3], scope='conv3') pool3 = slim.max_pool2d(conv3, [3, 3], stride=2, padding='SAME', scope='pool3') conv4 = slim.repeat(pool3, 3, slim.conv2d, 512, [3, 3], scope='conv4') pool4 = slim.max_pool2d(conv4, [3, 3], stride=1, padding='SAME', scope='pool4') conv5 = slim.repeat(pool4, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5') pool5 = slim.max_pool2d(conv5, [3, 3], stride=1, padding='SAME', scope='pool5') pool5a = slim.avg_pool2d(pool5, [3, 3], stride=1, padding='SAME', scope='pool5a') fc6 = slim.conv2d(pool5a, 1024, [3, 3], stride=1, rate=12, scope='fc6') fc6 = slim.dropout(fc6, 0.5, is_training=is_training, scope='drop6') fc7 = slim.conv2d(fc6, 1024, [1, 1], scope='fc7') fc7 = slim.dropout(fc7, 0.5, is_training=is_training, scope='drop7') pool6_1x1 = slim.avg_pool2d(fc7, [61, 81], stride=[61, 81], padding='SAME', scope='pool6_1x1') pool6_1x1_norm = slim.unit_norm(pool6_1x1, dim=3, scope='pool6_1x1_norm_new') pool6_1x1_norm_scale = pool6_1x1_norm * 10 pool6_1x1_norm_upsample = tf.tile(pool6_1x1_norm_scale, [1, 61, 81, 1], name='pool6_1x1_norm_upsample') out = tf.concat([fc7, pool6_1x1_norm_upsample], axis=-1, name='out') out_reduce = slim.conv2d(out, 256, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='out_reduce', padding='SAME', weights_initializer=weight_from_caffe(self.pretrain_weight), biases_initializer=bias_from_caffe(self.pretrain_weight)) out_conv = slim.conv2d(out_reduce, 256, [3, 3], activation_fn=tf.nn.relu, stride=1, scope='out_conv', padding='SAME', weights_initializer=weight_from_caffe(self.pretrain_weight), biases_initializer=bias_from_caffe(self.pretrain_weight)) out_conv_increase = slim.conv2d(out_conv, 1024, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='out_conv_increase', padding='SAME', weights_initializer=weight_from_caffe(self.pretrain_weight), biases_initializer=bias_from_caffe(self.pretrain_weight)) fc8_nyu_depth = slim.conv2d(out_conv_increase, 1, [1, 1], activation_fn=None, scope='fc8_nyu_depth') fc8_upsample = tf.image.resize_images(fc8_nyu_depth, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) #---------------------------------------vgg depth end --------------------------------------- ## ----------------- vgg norm--------------------------------------------------------------- conv1_norm = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1_norm') pool1_norm = slim.max_pool2d(conv1_norm, [3, 3], stride=2, padding='SAME', scope='pool1_norm') conv2_norm = slim.repeat(pool1_norm, 2, slim.conv2d, 128, [3, 3], scope='conv2_norm') pool2_norm = slim.max_pool2d(conv2_norm, [3, 3], stride=2, padding='SAME', scope='pool2_norm') conv3_norm = slim.repeat(pool2_norm, 3, slim.conv2d, 256, [3, 3], scope='conv3_norm') pool3_norm = slim.max_pool2d(conv3_norm, [3, 3], stride=2, padding='SAME', scope='pool3_norm') conv4_norm = slim.repeat(pool3_norm, 3, slim.conv2d, 512, [3, 3], scope='conv4_norm') pool4_norm = slim.max_pool2d(conv4_norm, [3, 3], stride=1, padding='SAME', scope='pool4_norm') conv5_norm = slim.repeat(pool4_norm, 3, slim.conv2d, 512, [3, 3], rate=2, scope='conv5_norm') pool5_norm = slim.max_pool2d(conv5_norm, [3, 3], stride=1, padding='SAME', scope='pool5_norm') pool5a_norm = slim.avg_pool2d(pool5_norm, [3, 3], stride=1, padding='SAME', scope='pool5a_norm') fc6_norm = slim.conv2d(pool5a_norm, 1024, [3, 3], stride=1, rate=12, scope='fc6_norm') fc6_norm = slim.dropout(fc6_norm, 0.5, is_training=is_training, scope='drop6_norm') fc7_norm = slim.conv2d(fc6_norm, 1024, [1, 1], scope='fc7_norm') fc7_norm = slim.dropout(fc7_norm, 0.5, is_training=is_training, scope='drop7_norm') pool6_1x1_norm_new = slim.avg_pool2d(fc7_norm, [61, 81], stride=[61, 81], padding='SAME', scope='pool6_1x1_norm_new') pool6_1x1_norm_norm = slim.unit_norm(pool6_1x1_norm_new, dim=3, scope='pool6_1x1_norm_new') pool6_1x1_norm_scale_norm = pool6_1x1_norm_norm * 10 pool6_1x1_norm_upsample_norm = tf.tile(pool6_1x1_norm_scale_norm, [1, 61, 81, 1], name='pool6_1x1_norm_upsample') out_norm = tf.concat([fc7_norm, pool6_1x1_norm_upsample_norm], axis=-1, name='out_norm') fc8_nyu_norm_norm = slim.conv2d(out_norm, 3, [1, 1], activation_fn=None, scope='fc8_nyu_norm_norm') fc8_upsample_norm = tf.image.resize_images(fc8_nyu_norm_norm, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) fc8_upsample_norm = slim.unit_norm(fc8_upsample_norm, dim=3) #-------------------------------------vgg norm end--------------------------------------------- # ------------- depth to normal + norm refinement--------------------------------------------------- with tf.variable_scope('noise', reuse=reuse): fc8_upsample_norm = tf.squeeze(fc8_upsample_norm) fc8_upsample_norm = tf.reshape(fc8_upsample_norm, [self.batch_size, self.crop_size_h, self.crop_size_w, 3]) norm_matrix = tf.extract_image_patches(images=fc8_upsample_norm, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') matrix_c = tf.reshape(norm_matrix, [self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3]) fc8_upsample_norm = tf.expand_dims(fc8_upsample_norm, axis=4) angle = tf.matmul(matrix_c, fc8_upsample_norm) valid_condition = tf.greater(angle, self.thresh) valid_condition_all = tf.tile(valid_condition, [1, 1, 1, 1, 3]) exp_depth = tf.exp(fc8_upsample * 0.69314718056) depth_repeat = tf.tile(exp_depth, [1, 1, 1, 3]) points = tf.multiply(grid, depth_repeat) point_matrix = tf.extract_image_patches(images=points, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') matrix_a = tf.reshape(point_matrix, [self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 3]) matrix_a_zero = tf.zeros_like(matrix_a, dtype=tf.float32) matrix_a_valid = tf.where(valid_condition_all, matrix_a, matrix_a_zero) matrix_a_trans = tf.matrix_transpose(matrix_a_valid, name='matrix_transpose') matrix_b = tf.ones(shape=[self.batch_size, self.crop_size_h, self.crop_size_w, self.k * self.k, 1]) point_multi = tf.matmul(matrix_a_trans, matrix_a_valid, name='matrix_multiplication') with tf.device('cpu:0'): matrix_deter = tf.matrix_determinant(point_multi) inverse_condition = tf.greater(matrix_deter, 1e-5) inverse_condition = tf.expand_dims(inverse_condition, axis=3) inverse_condition = tf.expand_dims(inverse_condition, axis=4) inverse_condition_all = tf.tile(inverse_condition, [1, 1, 1, 3, 3]) diag_constant = tf.ones([3], dtype=tf.float32) diag_element = tf.diag(diag_constant) diag_element = tf.expand_dims(diag_element, axis=0) diag_element = tf.expand_dims(diag_element, axis=0) diag_element = tf.expand_dims(diag_element, axis=0) diag_matrix = tf.tile(diag_element, [self.batch_size, self.crop_size_h, self.crop_size_w, 1, 1]) inversible_matrix = tf.where(inverse_condition_all, point_multi, diag_matrix) with tf.device('cpu:0'): inv_matrix = tf.matrix_inverse(inversible_matrix) generated_norm = tf.matmul(tf.matmul(inv_matrix, matrix_a_trans),matrix_b) norm_normalize = slim.unit_norm((generated_norm), dim=3) norm_normalize = tf.reshape(norm_normalize, [self.batch_size, self.crop_size_h, self.crop_size_w, 3]) norm_scale = norm_normalize * 10.0 conv1_noise = slim.repeat(norm_scale, 2, slim.conv2d, 64, [3, 3], scope='conv1_noise') pool1_noise = slim.max_pool2d(conv1_noise, [3, 3], stride=2, padding='SAME', scope='pool1_noise') # conv2_noise = slim.repeat(pool1_noise, 2, slim.conv2d, 128, [3, 3], scope='conv2_noise') conv3_noise = slim.repeat(conv2_noise, 3, slim.conv2d, 256, [3, 3], scope='conv3_noise') fc1_noise = slim.conv2d(conv3_noise, 512, [1, 1], activation_fn=tf.nn.relu, stride=1, scope='fc1_noise', padding='SAME') encode_norm_noise = slim.conv2d(fc1_noise, 3, [3, 3], activation_fn=None, stride=1, scope='encode_norm_noise', padding='SAME') encode_norm_upsample_noise = tf.image.resize_images(encode_norm_noise, [self.crop_size_h, self.crop_size_w], method=0, align_corners=True) sum_norm_noise = tf.add(norm_normalize, encode_norm_upsample_noise) norm_pred_noise = slim.unit_norm(sum_norm_noise, dim=3) norm_pred_all = tf.concat([tf.expand_dims(tf.squeeze(fc8_upsample_norm),axis=0),norm_pred_noise,inputs*0.00392156862],axis = 3) norm_pred_all = slim.repeat(norm_pred_all, 3, slim.conv2d, 128, [3, 3],rate=2, weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0),scope='conv1_norm_noise_new') norm_pred_all = slim.repeat(norm_pred_all, 3, slim.conv2d, 128, [3, 3],weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_norm_noise_new') norm_pred_final = slim.conv2d(norm_pred_all, 3, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='norm_conv3_noise_new') norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) # ------------- normal to depth + depth refinement--------------------------------------------------- with tf.variable_scope('norm_depth', reuse=reuse): grid_patch = tf.extract_image_patches(images=grid, ksizes=[1, self.k, self.k, 1], strides=[1, 1, 1, 1], rates=[1, self.rate, self.rate, 1], padding='SAME') grid_patch = tf.reshape(grid_patch, [self.batch_size, self.crop_size_h, self.crop_size_w, self.k*self.k, 3]) _, _, depth_data = tf.split(value=matrix_a, num_or_size_splits=3, axis=4) tmp_matrix_zero = tf.zeros_like(angle, dtype=tf.float32) valid_angle = tf.where(valid_condition,angle,tmp_matrix_zero) lower_matrix = tf.matmul(matrix_c,tf.expand_dims(grid,axis = 4)) condition = tf.greater(lower_matrix,1e-5) tmp_matrix = tf.ones_like(lower_matrix) lower_matrix = tf.where(condition,lower_matrix,tmp_matrix) lower = tf.reciprocal(lower_matrix) valid_angle = tf.where(condition,valid_angle,tmp_matrix_zero) upper = tf.reduce_sum(tf.multiply(matrix_c,grid_patch),[4]) ratio = tf.multiply(lower,tf.expand_dims(upper,axis=4)) estimate_depth = tf.multiply(ratio,depth_data) valid_angle = tf.multiply(valid_angle, tf.reciprocal(tf.tile(tf.reduce_sum(valid_angle,[3,4],keep_dims = True)+1e-5,[1,1,1,81,1]))) depth_stage1 = tf.reduce_sum(tf.multiply(estimate_depth, valid_angle), [3, 4]) depth_stage1 = tf.expand_dims(tf.squeeze(depth_stage1), axis=2) depth_stage1 = tf.clip_by_value(depth_stage1, 0, 10.0) exp_depth = tf.expand_dims(tf.squeeze(exp_depth), axis=2) depth_all = tf.expand_dims(tf.concat([depth_stage1, exp_depth,tf.squeeze(inputs)*0.00392156862], axis=2), axis=0) depth_pred_all = slim.repeat(depth_all, 3, slim.conv2d, 128, [3, 3], rate=2,weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv1_depth_noise_new') depth_pred_all = slim.repeat(depth_pred_all, 3, slim.conv2d, 128, [3, 3],weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_depth_noise_new') final_depth = slim.conv2d(depth_pred_all, 1, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='depth_conv3_noise_new') with tf.variable_scope('edge_refinemet', reuse=reuse): print(inputs.shape) edges = tf.py_func(myfunc_canny, [inputs], tf.float32) edges = tf.reshape(edges,[1,self.crop_size_h,self.crop_size_w,1]) edge_input_depth = final_depth edge_input_norm = norm_pred_final #edge prediction for depth edge_inputs = tf.concat([edges,inputs*0.00784],axis=3) edges_encoder = slim.repeat(edge_inputs, 3, slim.conv2d, 32, [3, 3],rate = 2,weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv1_edge_refinement') edges_encoder = slim.repeat(edges_encoder, 3, slim.conv2d, 32, [3, 3], weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='conv2_edge_refinement') edges_predictor = slim.conv2d(edges_encoder, 8, [3, 3], activation_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(uniform=False), biases_initializer=tf.constant_initializer(0.0), scope='edge_weight') edges_all = edges_predictor + tf.tile(edges,[1,1,1,8]) edges_all = tf.clip_by_value(edges_all,0.0,1.0) dlr,drl,dud,ddu,nlr,nrl,nud,ndu = tf.split(edges_all,num_or_size_splits=8,axis=3) # 4 iteration depth final_depth = propagate(edge_input_depth,dlr,drl,dud,ddu,1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) final_depth = propagate(final_depth, dlr, drl, dud, ddu, 1) # 4 iteration norm norm_pred_final = propagate(edge_input_norm, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr,nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) norm_pred_final = propagate(norm_pred_final, nlr, nrl, nud, ndu, 3) norm_pred_final = slim.unit_norm((norm_pred_final), dim=3) return final_depth,fc8_upsample_norm,norm_pred_final,fc8_upsample
def inference(images): """Build the CIFAR-10 model. Args: images: Images returned from distorted_inputs() or inputs(). Returns: Logits. """ # We instantiate all variables using tf.get_variable() instead of # tf.Variable() in order to share variables across multiple GPU training runs. # If we only ran this model on a single GPU, we could simplify this function # by replacing all instances of tf.get_variable() with tf.Variable(). # # conv1 with tf.variable_scope('conv1') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 3, 64], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(images, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.0)) pre_activation = tf.nn.bias_add(conv, biases) conv1 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv1) # pool1 pool1 = tf.nn.max_pool(conv1, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1') # norm1 norm1 = tf.nn.lrn(pool1, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1') # local response normalization # conv2 with tf.variable_scope('conv2') as scope: kernel = _variable_with_weight_decay('weights', shape=[5, 5, 64, 64], stddev=5e-2, wd=0.0) conv = tf.nn.conv2d(norm1, kernel, [1, 1, 1, 1], padding='SAME') biases = _variable_on_cpu('biases', [64], tf.constant_initializer(0.1)) pre_activation = tf.nn.bias_add(conv, biases) conv2 = tf.nn.relu(pre_activation, name=scope.name) _activation_summary(conv2) # norm2 norm2 = tf.nn.lrn(conv2, 4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2') # pool2 pool2 = tf.nn.max_pool(norm2, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2') """ # original code # local3 with tf.variable_scope('local3') as scope: # Move everything into depth so we can perform a single matrix multiply. reshape = tf.reshape(pool2, [FLAGS.batch_size, -1]) dim = reshape.get_shape()[1].value weights = _variable_with_weight_decay('weights', shape=[dim, 384], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [384], tf.constant_initializer(0.1)) local3 = tf.nn.relu(tf.matmul(reshape, weights) + biases, name=scope.name) _activation_summary(local3) # local4 with tf.variable_scope('local4') as scope: weights = _variable_with_weight_decay('weights', shape=[384, 192], stddev=0.04, wd=0.004) biases = _variable_on_cpu('biases', [192], tf.constant_initializer(0.1)) local4 = tf.nn.relu(tf.matmul(local3, weights) + biases, name=scope.name) _activation_summary(local4) """ """changed to locally connected layers t construction see the url=https://github.com/akrizhevsky/cuda-convnet2/blob/master/layers/layers-cifar10-11pct.cfg""" """ detail of locally connected layers see the url=https://prateekvjoshi.com/2016/04/12/understanding-locally-connected-layers-in-convolutional-neural-networks/""" with tf.variable_scope('local3') as scope: #shape = pool2.get_shape() #h = shape[1].value #w = shape[2].value sz_local = 3 # kernel_size #sz_patch = (sz_local**2)*shape[3].value n_channels = 64 # Extract 3x3 tensor patches patches = tf.extract_image_patches(pool2,[1,sz_local,sz_local,1],[1,1,1,1],[1,1,1,1],'SAME') shape = patches.get_shape() h = shape[1].value w = shape[2].value sz_patch = shape[3].value weights = _variable_with_weight_decay('weights',shape=[1,h,w,sz_patch,n_channels], stddev=5e-2, wd=0.04) bias = _variable_on_cpu('biases',[h,w,n_channels], tf.constant_initializer(0.1)) # "Filter" each patch with its own kernel mul = tf.multiply(tf.expand_dims(patches,axis=-1),weights) patches_sum = tf.reduce_sum(mul,axis=3) pre_activation = tf.add(patches_sum,bias) local3 = tf.nn.relu(pre_activation,name=scope.name) _activation_summary(local3) with tf.variable_scope('local4') as scope: sz_local = 3 # kernel_size n_channels = 32 # Extract 3x3 tensor patches patches = tf.extract_image_patches(local3,[1,sz_local,sz_local,1],[1,1,1,1],[1,1,1,1],'SAME') shape = patches.get_shape() h = shape[1].value w = shape[2].value sz_patch = shape[3].value weights = _variable_with_weight_decay('weights',shape=[1,h,w,sz_patch,n_channels],stddev=5e-2, wd=0.04) bias = _variable_on_cpu('biases', [h,w,n_channels], tf.constant_initializer(0.1)) # "Filter" each patch with its own kernel mul = tf.multiply(tf.expand_dims(patches,axis=-1),weights) patches_sum = tf.reduce_sum(mul,axis=3) pre_activation = tf.add(patches_sum,bias) local4 = tf.nn.relu(pre_activation,name=scope.name) _activation_summary(local4) # linear layer(WX + b), # We don't apply softmax here because # tf.nn.sparse_softmax_cross_entropy_with_logits accepts the unscaled logits # and performs the softmax internally for efficiency. with tf.variable_scope('softmax_linear') as scope: weights = _variable_with_weight_decay('weights', [192, NUM_CLASSES], stddev=1/192.0, wd=0.0) biases = _variable_on_cpu('biases', [NUM_CLASSES], tf.constant_initializer(0.0)) softmax_linear = tf.add(tf.matmul(local4, weights), biases, name=scope.name) """add tf.nn.sorftmax() when we do this, we must change the loss""" softmax_linear = tf.nn.softmax(softmax_linear) _activation_summary(softmax_linear)
import tensorflow as tf import numpy as np ksize = 6 stride = 9 img_size = 55 a = tf.placeholder(tf.float32, [img_size,img_size]) _a = tf.expand_dims(a, axis=0) _a = tf.expand_dims(_a, axis=3) b = tf.extract_image_patches(_a, ksizes=[1,ksize,ksize,1], strides=[1,stride,stride,1], rates=[1,1,1,1], padding='VALID') sess = tf.Session() sess.run(tf.global_variables_initializer()) f = np.arange(img_size**2) * 0.01 f = np.reshape(f, [img_size,img_size]) fd = {a: f} s= (sess.run(a, fd)) for i in range(img_size): print (s[i]) ppp= (sess.run(b, fd)) print (ppp)
def _extract_image_patches(self, NHWC_X): # returns: N x H x W x C * P return tf.extract_image_patches( NHWC_X, [1, self.filter_size, self.filter_size, 1], [1, self.stride, self.stride, 1], [1, self.dilation, self.dilation, 1], "VALID")
def forward(self): inp = self.inp.out s = self.lay.stride self.out = tf.extract_image_patches( inp, [1,s,s,1], [1,s,s,1], [1,1,1,1], 'VALID')
def tf_median(x, kernel): with tf.name_scope('median_filter'): xp = tf.pad(x, [[0, 0], 2*[kernel//2], 2*[kernel//2], [0, 0]], 'REFLECT') patches = tf.extract_image_patches(xp, [1, kernel, kernel, 1], [1, 1, 1, 1], 4*[1], 'VALID') patches = tf.reshape(patches, [tf.shape(patches)[0], tf.shape(patches)[1], tf.shape(patches)[2], tf.shape(patches)[3]//3, 3]) return tf.contrib.distributions.percentile(patches, 50, axis=3)
def forward(self): inp = self.inp.out s = self.lay.stride self.out = tf.extract_image_patches(inp, [1, s, s, 1], [1, s, s, 1], [1, 1, 1, 1], 'VALID')
def _body(i, posterior, activation, center, masses): """Body of the EM while loop.""" del activation beta = final_beta * (1 - tf.pow(0.95, tf.cast(i + 1, tf.float32))) # beta = final_beta # route: [outdim, height?, width?, batch, indim] vote_conf = posterior * input_activation # masses: [batch, 1, outdim, 1, height, width, 1, 1] masses = tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( vote_conf, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) + 0.0000001 preactivate_unrolled = vote_conf * wx # center: [batch, 1, outdim, outatom, height, width] center = .9 * tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( preactivate_unrolled, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) / masses + .1 * center noise = (wx - center) * (wx - center) variance = min_var + tf.reduce_sum(tf.reduce_sum(tf.reduce_sum( vote_conf * noise, axis=1, keep_dims=True), axis=-1, keep_dims=True), axis=-2, keep_dims=True) / masses log_variance = tf.log(variance) p_i = -1 * tf.reduce_sum(log_variance, axis=3, keep_dims=True) log_2pi = tf.log(2 * math.pi) win = masses * (p_i - sigma_biases * num_out_atoms * (log_2pi + 1.0)) logit = beta * (win - activation_biases * 5000) activation_update = tf.minimum( 0.0, logit) - tf.log(1 + tf.exp(-tf.abs(logit))) # return activation, center log_det_sigma = -1 * p_i sigma_update = (num_out_atoms * log_2pi + log_det_sigma) / 2.0 exp_update = tf.reduce_sum(noise / (2 * variance), axis=3, keep_dims=True) prior_update = activation_update - sigma_update - exp_update max_prior_update = tf.reduce_max(tf.reduce_max(tf.reduce_max( tf.reduce_max(prior_update, axis=-1, keep_dims=True), axis=-2, keep_dims=True), axis=-3, keep_dims=True), axis=-4, keep_dims=True) prior_normal = tf.add(prior_update, -1 * max_prior_update) prior_exp = tf.exp(prior_normal) t_prior = tf.transpose(prior_exp, [0, 1, 2, 3, 4, 6, 5, 7]) c_prior = tf.reshape(t_prior, [-1, n * k, n * k, 1]) pad_prior = tf.pad(c_prior, [[0, 0], [(k - 1) * (k - 1), (k - 1) * (k - 1)], [(k - 1) * (k - 1), (k - 1) * (k - 1)], [0, 0]], 'CONSTANT') patch_prior = tf.extract_image_patches(images=pad_prior, ksizes=[1, k, k, 1], strides=[1, k, k, 1], rates=[1, k - 1, k - 1, 1], padding='VALID') sum_prior = tf.reduce_sum(patch_prior, axis=-1, keep_dims=True) sum_prior_patch = tf.extract_image_patches(images=sum_prior, ksizes=[1, k, k, 1], strides=[1, 1, 1, 1], rates=[1, 1, 1, 1], padding='VALID') sum_prior_reshape = tf.reshape( sum_prior_patch, [-1, input_dim, output_dim, 1, n, n, k, k]) + 0.0000001 posterior = prior_exp / sum_prior_reshape return (posterior, logit, center, masses)
def reorg(x, stride): return tf.extract_image_patches(x, [1, stride, stride, 1], [1, stride, stride, 1], [1,1,1,1], padding="VALID")
def reorg(x, stride): return tf.extract_image_patches(x, [1, stride, stride, 1], [1, stride, stride, 1], [1, 1, 1, 1], padding="VALID")
def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, relu=True, padding=DEFAULT_PADDING, group=1, biased=True): # Verify that the padding is acceptable self.validate_padding(padding) # Get the number of channels in the input c_i = input.get_shape()[-1] # Verify that the grouping parameter is valid assert c_i % group == 0 assert c_o % group == 0 # Convolution for a given input and kernel convolve = lambda i, k: tf.nn.conv2d( i, k, [1, s_h, s_w, 1], padding=padding) with tf.variable_scope(name) as scope: # Get input patches and construct hessian op # print '%s' %name get_patches_op = tf.extract_image_patches(input, \ ksizes=[1, k_h, k_w, 1], \ strides=[1, s_h, s_w, 1], \ rates=[1, 1, 1, 1], padding=padding) self.get_layer_inputs_op[name] = get_patches_op print 'Layer %s, input shape: %s' % (name, get_patches_op.get_shape()) # print 'Input shape: %s' % input.get_shape().as_list() # First method to calculate hessain # print 'Patch shape: %s' % get_patches_op.get_shape().as_list() # patches_shape = get_patches_op.get_shape().as_list() # n_patches = batch_size * patches_shape[1] * patches_shape[2] # Number of patches in one batch ''' a = tf.expand_dims(get_patches_op, axis=-1) # print 'a shape: %s' %a.get_shape() a = tf.concat([a, tf.ones([tf.shape(a)[0], tf.shape(a)[1], tf.shape(a)[2], 1, 1])], axis=3) # print 'a shape: %s' %a.get_shape() # print 'get_patches_op shape: %s' %get_patches_op.get_shape() b = tf.expand_dims(get_patches_op, axis=3) b = tf.concat([b, tf.ones([tf.shape(b)[0], tf.shape(b)[1], tf.shape(b)[2], 1, 1])], axis=4) # print 'b shape: %s' %b.get_shape() outprod = tf.multiply(a, b) # print 'outprod shape: %s' %outprod.get_shape() self.get_batch_hessian_op[name] = tf.reduce_mean(outprod, axis=[0, 1, 2]) print 'Layer %s, hessian shape: %s' % (name, self.get_batch_hessian_op[name].get_shape()) ''' ''' patches_shape = get_patches_op.get_shape().as_list() Dtensor = tf.reshape(get_patches_op, [-1, patches_shape[1] * patches_shape[2], patches_shape[3], 1]) print 'Dtensor: %s' % Dtensor.get_shape() Dtensor = tf.concat([Dtensor, tf.ones([tf.shape(Dtensor)[0], tf.shape(Dtensor)[1], 1, 1])], axis=2) print 'Dtensor after concatenating one: %s' % Dtensor.get_shape() print 'Dtensor shape: %s' % Dtensor.get_shape() self.get_batch_hessian_op[name] = tf.reduce_mean( tf.matmul(Dtensor, Dtensor, transpose_b=True), axis=[0, 1]) print 'Hessian shape: %s' % self.get_batch_hessian_op[name].get_shape() ''' kernel = self.make_var('weights', shape=[k_h, k_w, c_i / group, c_o]) if group == 1: # This is the common-case. Convolve the input without any further complications. output = convolve(input, kernel) else: # The following commented lines are the old code worked with old version of Tensorflow # # Split the input into groups and then convolve each of them independently # input_groups = tf.split(3, group, input) # kernel_groups = tf.split(3, group, kernel) # output_groups = [convolve(i, k) for i, k in zip(input_groups, kernel_groups)] # # Concatenate the groups # output = tf.concat(3, output_groups) # The following lines are work with current Tensorflow version # Split the input into groups and then convolve each of them independently input_groups = tf.split(input, group, 3) kernel_groups = tf.split(kernel, group, 3) output_groups = [ convolve(i, k) for i, k in zip(input_groups, kernel_groups) ] # Concatenate the groups output = tf.concat(output_groups, 3) # Add the biases if biased: biases = self.make_var('biases', [c_o]) output = tf.nn.bias_add(output, biases) if relu: # ReLU non-linearity output = tf.nn.relu(output, name=scope.name) return output
def hlconv( x=None, uw=None, sw=None, ksizes=None, strides=None, padding=None, name=None, **kwargs ): # replace default args with kwargs if 'x' in kwargs: x = kwargs['x'] if 'uw' in kwargs: uw = kwargs['uw'] if 'sw' in kwargs: sw = kwargs['sw'] if 'ksizes' in kwargs: ksizes = kwargs['ksizes'] if 'strides' in kwargs: strides = kwargs['strides'] if 'padding' in kwargs: padding = kwargs['padding'] if 'name' in kwargs: name = kwargs['name'] # simple validation assert len(ksizes)==4 assert len(x.shape.as_list())==4 assert ksizes[0]==1 kh = ksizes[1] kw = ksizes[2] depth = x.shape.as_list()[3] kn = uw.shape.as_list()[0] assert kh*kw*depth==uw.shape.as_list()[1] patches = tf.extract_image_patches( x, ksizes = ksizes, strides = strides, rates = [1,1,1,1], padding = padding, name = name + '-p' ) # create output tensors y = list(range(kn)) fields = list(range(kn)) arrivals = list(range(kn)) # construct sub-operator-graph for _i in xrange(kn): fields[_i] = tf.norm(uw[_i]-patches, axis=3) arrivals[_i] = tf.reduce_sum(sw[_i]*patches, axis=3) fields_stacked = tf.stack(fields, axis=0) fields_polarized = sig_polar(fields_stacked) for _i in xrange(kn): y[_i] = fields_polarized[_i]*arrivals[_i] y_stacked = tf.stack(y, axis=3, name=name+'-hlconv') utrain_op, energy = make_unsupervised_train_op( uw, patches, fields_polarized ) return (y_stacked, utrain_op, energy)