def _squeezenet(images, num_classes=1000): images = tf.reshape(images, [-1, 224, 224, 3]) net = conv2d(images, 64, [3, 3], stride=2, scope='conv1') net = max_pool2d(net, [3, 3], stride=2, scope='maxpool1') net = fire_module(net, 16, 64, scope='fire2') net_fire3 = fire_module(net, 16, 64, scope='fire3') # net = tf.add(net,net_fire3, name = "connect1") ############ net = max_pool2d(net_fire3, [3, 3], stride=2, scope='maxpool3') net = fire_module(net, 32, 128, scope='fire4') net_fire5 = fire_module(net, 32, 128, scope='fire5') # net = tf.add(net,net_fire5,name="connect2") ################# net = max_pool2d(net_fire5, [3, 3], stride=2, scope='maxpool5') net = fire_module(net, 48, 192, scope='fire6') net_fire7 = fire_module(net, 48, 192, scope='fire7') #net = tf.add(net,net_fire7,name="connect3") ############### net = fire_module(net_fire7, 64, 256, scope='fire8') net_fire9 = fire_module(net, 64, 256, scope='fire9') #net = tf.add(net,net_fire9,name="connect4") ################# net = conv2d(net_fire9, num_classes, [1, 1], stride=1, scope='conv10') net = avg_pool2d(net, [13, 13], stride=1, scope='avgpool10') # logits = tf.squeeze(net, [2], name='logits') logits = avg_pool2d(net_fire7, [3, 3], stride=1, scope='avgpool10') return logits
def dense_transition(inflow, name): with tf.variable_scope(name): in_filters = inflow.get_shape().as_list()[3] inflow = layers.batch_norm(inflow, activation_fn=tf.nn.relu) inflow = layers.conv2d(inflow, in_filters, 1) inflow = layers.avg_pool2d(inflow, 2) return inflow
def pyramid_pooling_layer(net): sd = net.get_shape()[1:3] sd1 = [sd[0].value, sd[1].value] sd2 = [sd[0].value // 2, sd[1].value // 2] sd3 = [sd1[0] // 3, sd1[1] // 3] sd4 = [sd1[0] // 6, sd1[1] // 6] upsampled_size=[FLAGS.img_height//FLAGS.subsample_factor, FLAGS.img_width//FLAGS.subsample_factor] first = layers.avg_pool2d(net, kernel_size=sd1) first_conv = layers.convolution2d(first, 128, kernel_size=1) first_up = tf.image.resize_bilinear(first_conv,upsampled_size , name='spp-1') second = layers.max_pool2d(net, kernel_size=sd2, stride=sd2) second_conv = layers.convolution2d(second, 128, kernel_size=1, scope='spp-2') second_up = tf.image.resize_bilinear(second_conv, upsampled_size, name='spp-2') third = layers.max_pool2d(net, kernel_size=sd3, stride=sd3) third_conv = layers.convolution2d(third, 128, kernel_size=1, scope='spp-3') third_up = tf.image.resize_bilinear(third_conv, upsampled_size, name='spp-3') forth = layers.max_pool2d(net, kernel_size=sd4, stride=sd4) forth_conv = layers.convolution2d(forth, 128, kernel_size=1, scope='spp-4') forth_up = tf.image.resize_bilinear(forth_conv,upsampled_size, name='spp-4') stacked=tf.concat(3, [first_up,second_up,third_up,forth_up],name='spp') print('result shape',stacked.get_shape()) return stacked
def NN2(x): net = conv2d(x, 64, 7, stride=2, scope='conv1') net = max_pool2d(net, [3, 3], stride=2, padding="SAME", scope='max_pool1') net = conv2d(net, 64, 1, stride=1, scope='inception2_11') net = conv2d(net, 192, 3, stride=1, scope='inception2_33') net = max_pool2d(net, [3, 3], stride=2, padding="SAME", scope='max_pool2') # inception3a net = inception(net, 1, 64, 96, 128, 16, 32, 32, scope='inception3a') # inception3b net = inception(net, 1, 64, 96, 128, 32, 64, 64, scope='inception3b') # inception3c net = inception(net, 2, 0, 128, 256, 32, 64, 0, scope='inception3c') # inception4a net = inception(net, 1, 256, 96, 192, 32, 42, 128, scope='inception4a') # inception4b net = inception(net, 1, 224, 112, 224, 32, 64, 128, scope='inception4b') # inception4c net = inception(net, 1, 192, 128, 256, 32, 64, 128, scope='inception4c') # inception4d net = inception(net, 1, 160, 144, 288, 32, 64, 128, scope='inception4d') # inception4e net = inception(net, 2, 0, 160, 256, 64, 128, 0, scope='inception4e') # inception5a net = inception(net, 1, 384, 192, 384, 48, 128, 128, scope='inception5a') # inception5b net = inception(net, 1, 384, 192, 384, 48, 128, 128, scope='inception5b') # avg pool net = avg_pool2d(net, [7, 7], stride=1, scope='avg_pool') # fc net = tf.reshape(net, [-1, 1024]) net = fully_connected(net, 128, scope='fc') # L2 norm net = l2norm(net) return net
def build_net(x, n, is_training): shape = x.get_shape().as_list() with tf.variable_scope('pre'): pre = layers.conv2d(inputs=x, num_outputs=16, kernel_size = [3, 3], scope='conv', weights_initializer=tf.truncated_normal_initializer( stddev=math.sqrt(2.0 / 9.0 / shape[3])), weights_regularizer=layers.l2_regularizer(0.0001), normalizer_fn=layers.batch_norm, normalizer_params={'is_training': is_training}) # pre = layers.max_pool2d(pre, [2, 2], padding='SAME', scope='pool') h = pre for i in range(1, n + 1): h = block(h, 16, 0.0001, '16_block{}'.format(i), is_training) h = block(h, 32, 0.0001, '32_block1', is_training, True) for i in range(2, n + 1): h = block(h, 32, 0.0001, '32_block{}'.format(i), is_training) h = block(h, 64, 0.0001, '64_block1', is_training, True) for i in range(2, n + 1): h = block(h, 64, 0.0001, '64_block{}'.format(i), is_training) shape = h.get_shape().as_list() h = layers.avg_pool2d(h, [shape[1], shape[2]], scope='global_pool') h = layers.conv2d(inputs=h, num_outputs=10, kernel_size=[1, 1], scope='fc1', padding='VALID', weights_initializer=tf.truncated_normal_initializer( stddev=math.sqrt(2.0 / 64 / 10)), weights_regularizer=layers.l2_regularizer(0.0001), normalizer_fn=layers.batch_norm, normalizer_params={'is_training': is_training}) return tf.reshape(h, [-1, 10])
def __call__(self, x, trainable=True, is_training=True, reuse=False): with tf.variable_scope(self._name, reuse=reuse): with arg_scope( _arg_scope(is_training, self._weight_decay, self._batch_norm_decay)): net = conv2d(x, 96, [2, 2], scope='conv1') net = max_pool2d(net, [2, 2], scope='maxpool1') net = fire_module(net, 16, 64, scope='fire2') net = fire_module(net, 16, 64, scope='fire3') net = fire_module(net, 32, 128, scope='fire4') net = max_pool2d(net, [2, 2], scope='maxpool4') net = fire_module(net, 32, 128, scope='fire5') net = fire_module(net, 48, 192, scope='fire6') net = fire_module(net, 48, 192, scope='fire7') net = fire_module(net, 64, 256, scope='fire8') net = max_pool2d(net, [2, 2], scope='maxpool8') net = fire_module(net, 64, 256, scope='fire9') net = avg_pool2d(net, [8, 8], scope='avgpool10') net = conv2d(net, self._num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv10') logits = tf.squeeze(net, [1, 2], name='logits') return logits
def discriminator(x, y): h = tf.reshape(x, [-1, 64, 64, 3]) # noise = tf.random_normal([mb_size, 64, 64, 1]) noise = tf.random_uniform([mb_size, 64, 64, 1], -1, 1) h = tf.concat([h, noise], axis=3) h = layers.conv2d(h, 64, 5, stride=2, padding='SAME', activation_fn=None, weights_initializer=initializer) h = layers.batch_norm(h, activation_fn=lrelu) h = layers.conv2d(h, 64 * 2, 5, stride=2, padding='SAME', activation_fn=None, weights_initializer=initializer) h = layers.batch_norm(h, activation_fn=lrelu) h = layers.conv2d(h, 64 * 4, 5, stride=2, padding='SAME', activation_fn=None, weights_initializer=initializer) h = layers.batch_norm(h, activation_fn=lrelu) h = layers.conv2d(h, 64 * 8, 5, stride=2, padding='SAME', activation_fn=None, weights_initializer=initializer) h = layers.batch_norm(h, activation_fn=lrelu) # average pooling h = layers.avg_pool2d(h, 2, stride=2) h = layers.flatten(h) noise_z = tf.random_uniform([mb_size, noise_dim], -1, 1) y = tf.concat([y, noise_z], axis=1) zh = layers.fully_connected(y, 2*2*512, activation_fn=lrelu) h = tf.concat([h, zh], axis=1) h = layers.fully_connected(h, 1, activation_fn=None) return h, tf.sigmoid(h)
def avg_pool2d(x, kernel_size, stride=1, padding='SAME', name='AvgPool'): return layers.avg_pool2d(x, kernel_size, stride, padding=padding, outputs_collections=tf.GraphKeys.ACTIVATIONS, scope=name)
def googlenet(inputs, rate=0.4, n_classes=10): with tf.name_scope('googlenet'): conv1 = tf.nn.relu(layers.conv2d(inputs, 64, [7, 7], stride=2, scope='conv1')) pool1 = layers.max_pool2d(conv1, [3, 3], scope='pool1') conv2 = tf.nn.relu(layers.conv2d(pool1, 192, [3, 3], stride=1, scope='conv2')) pool2 = layers.max_pool2d(conv2, [3, 3], stride='pool2') with tf.variable_scope('Inception_3a'): incpt3a = get_inception_layer(pool2, 64, 96, 128, 16, 32, 32) with tf.variable_scope("Inception_3b"): incpt3b = get_inception_layer(incpt3a, 128, 128, 192, 96, 64) pool3 = layers.max_pool2d(incpt3b, [3, 3], scope='pool3') with tf.variable_scope("Inception_4a"): incpt4a = get_inception_layer(pool3, 192, 96, 208, 16, 48, 64) with tf.variable_scope("aux_logit_layer1"): aux1 = aux_logit_layer(incpt4a, n_classes, is_training=True) with tf.variable_scope("Inception_4b"): incpt4b = get_inception_layer(incpt4a, 160, 112, 224, 24, 64, 64) with tf.variable_scope("Inception_4c"): incpt4c = get_inception_layer(incpt4b, 128, 128, 256, 24, 64, 64) with tf.variable_scope("Inception_4d"): incpt4d = get_inception_layer(incpt4c, 112, 144, 288, 32, 64, 64) with tf.variable_scope("aux_logit_layer2"): aux2 = aux_logit_layer(incpt4d, n_classes, is_training=True) pool4 = layers.max_pool2d(incpt4d, [3, 3], scope='pool4') with tf.variable_scope("Inception_5a"): incept5a = get_inception_layer(pool4, 256, 160, 320, 32, 128, 128) with tf.variable_scope("Inception_5b"): incept5b = get_inception_layer(incept5a, 384, 192, 384, 48, 128, 128) pool5 = layers.avg_pool2d(incept5b, [7, 7], stride=1, scope='pool5') reshape = tf.reshape(pool5, [-1, 2048]) drop = layers.dropout(reshape, rate, is_training=True) linear = layers.fully_connected(drop, n_classes, activation_fn=None, scope='linear') # soft = tf.nn.softmax(linear) return linear, aux1, aux2
def inference(self, scope='squeeze_net'): # inference squeeze net with tf.variable_scope(scope): net = self.__conv2d(self.resized_data, 32, [3, 3], stride=1, scope="conv_1") net = self._block(net, 64, stride=1, scope="dw_conv_2") net = self._block(net, 128, stride=1, scope="dw_conv_3") net = self._block(net, 128, stride=1, scope="dw_conv_4") net = self._block(net, 256, stride=1, scope="dw_conv_5") net = self._block(net, 256, stride=1, scope="dw_conv_6") net = self._block(net, 512, stride=2, scope="dw_conv_7") #输出:16 * 16 * 512 for i in range(5): self._block(net, 512, stride=1, scope="dw_conv_" + str(8 + i)) net = self._block(net, 1024, stride=2, scope="dw_conv_13") net = self._block(net, 1024, stride=1, scope="dw_conv_14") #输出:8 * 8 * 1024 net = layers.avg_pool2d(net, [8, 8], stride=1, scope='avg_pool_15') net = tf.squeeze(net, [2], name='SpatialSqueeze') logits = layers.fully_connected(net, self.num_classes, activation_fn=None, scope='fc_16') return logits
def _pyramid_pooling(net, size, num_pools=3): print('Pyramid context pooling') with tf.variable_scope('pyramid_context_pooling'): if known_shape: shape = net.get_shape().as_list() else: shape = tf.shape(net) print('shape = ', shape) up_size = shape[height_dim:height_dim+2] shape_info = net.get_shape().as_list() num_maps = net.get_shape().as_list()[maps_dim] #grid_size = [6, 3, 2, 1] pool_dim = int(round(num_maps / num_pools)) concat_lst = [net] for i in range(num_pools): #pool = layers.avg_pool2d(net, kernel_size=[kh, kw], stride=[kh, kw], padding='SAME') #pool = layers.avg_pool2d(net, kernel_size=[kh, kh], stride=[kh, kh], padding='SAME') print('before pool = ', net) net = layers.avg_pool2d(net, 2, 2, padding='SAME', data_format=data_format) print(net) pool = BNReluConv(net, pool_dim, k=1, name='bottleneck'+str(i)) #pool = tf.image.resize_bilinear(pool, [height, width], name='resize_score') pool = resize_tensor(pool, up_size, name='upsample_level_'+str(i)) concat_lst.append(pool) net = tf.concat(concat_lst, maps_dim) print('Pyramid pooling out: ', net) #net = BNReluConv(net, 512, k=3, name='bottleneck_out') net = BNReluConv(net, size, k=3, name='bottleneck_out') return net
def avg_pool(_input, k=2, s=2): padding = 'VALID' output = avg_pool2d( _input, kernel_size=[ k, k], stride=[ s, s], padding=padding, data_format='NHWC') return output
def cifar10_resnet(inp, n=3): net = conv2d(inp, num_outputs=16, kernel_size=3, stride=1, normalizer_fn=batch_norm, activation_fn=tf.nn.relu) # 32x32, 16 units for i in range(n): net = resblock(net, 16, downpool=False, upchannel=False) # 16x16, 32 units net = resblock(net, 32, downpool=True, upchannel=True) for i in range(n - 1): net = resblock(net, 32, downpool=False, upchannel=False) # 8x8, 64 units net = resblock(net, 64, downpool=True, upchannel=True) for i in range(n - 1): net = resblock(net, 64, downpool=False, upchannel=False) net = avg_pool2d(net, kernel_size=6, stride=1) net = fully_connected(net, num_outputs=10, activation_fn=None) out = tf.nn.softmax(tf.reshape(net, [FLAGS.batch_size, -1])) return out
def downscale_pool(in_tensor, out_dim, name='conv', kernel_sz=3, act=None, stride=2, bn=True): with tf.variable_scope(name): if bn: norm = l.batch_norm l_act=None else: norm = None l_act=act out = l.conv2d(in_tensor, num_outputs=out_dim, kernel_size=kernel_sz, stride=stride, normalizer_fn = norm, activation_fn=l_act, padding = 'SAME', normalizer_params={'activation_fn':act}) out = l.conv2d(out, num_outputs=out_dim, kernel_size=kernel_sz, stride=1, normalizer_fn = norm, activation_fn=l_act, padding = 'SAME', normalizer_params={'activation_fn':act}) down = l.avg_pool2d(in_tensor, 3, 2, padding='SAME') proj = l.conv2d(down, num_outputs=out_dim, kernel_size=1, stride=1, normalizer_fn = norm, activation_fn=None, padding = 'SAME', normalizer_params={'activation_fn':None}) return out + proj
def avg_pool(_input, k=2, s=2): padding = 'VALID' assert int(_input.get_shape()[1]) == k == s output = avg_pool2d( _input, kernel_size=[ k, k], stride=[ s, s], padding=padding, data_format='NHWC') return output
def subsample(inputs, factor, scope): if factor == 1: return inputs else: # avg for auto encoder return avg_pool2d(inputs, [1, 1], stride=factor, padding='SAME', scope=scope)
def googlenet(self,inputs): dropout=1-self.keep_prob ''' Implementation of https://arxiv.org/pdf/1409.4842.pdf ''' with tf.name_scope( "google_net", "googlenet", [inputs] ): with ops.arg_scope( [ layers.max_pool2d ], padding = 'SAME' ): conv0 = layers.conv2d( inputs, 64, [ 7, 7 ], stride = 1, scope = 'conv0' ) pool0 = layers.max_pool2d(conv0, [3, 3], scope='pool0') conv1_a = layers.conv2d( pool0, 64, [ 1, 1 ], scope = 'conv1_a' ) conv1_b = layers.conv2d( conv1_a, 192, [ 3, 3 ], scope = 'conv1_b' ) pool1 = layers.max_pool2d(conv1_b, [ 3, 3 ], scope='pool1') with tf.variable_scope("inception_3a"): inception_3a = self.get_inception_layer( pool1, 64, 96, 128, 16, 32, 32 ) with tf.variable_scope("inception_3b"): inception_3b = self.get_inception_layer( inception_3a, 128, 128, 192, 32, 96, 64 ) pool2 = layers.max_pool2d(inception_3b, [ 3, 3 ], scope='pool2') with tf.variable_scope("inception_4a"): inception_4a = self.get_inception_layer( pool2, 192, 96, 208, 16, 48, 64 ) #with tf.variable_scope("aux_logits_1"): #aux_logits_1 = self.aux_logit_layer( inception_4a, self.n_classes, self.is_training ) with tf.variable_scope("inception_4b"): inception_4b = self.get_inception_layer( inception_4a, 160, 112, 224, 24, 64, 64 ) with tf.variable_scope("inception_4c"): inception_4c = self.get_inception_layer( inception_4b, 128, 128, 256, 24, 64, 64 ) with tf.variable_scope("inception_4d"): inception_4d = self.get_inception_layer( inception_4c, 112, 144, 288, 32, 64, 64 ) #with tf.variable_scope("aux_logits_2"): #aux_logits_2 = self.aux_logit_layer( inception_4d, self.n_classes, self.is_training ) with tf.variable_scope("inception_4e"): inception_4e = self.get_inception_layer( inception_4d, 256, 160, 320, 32, 128, 128 ) pool3 = layers.max_pool2d(inception_4e, [ 3, 3 ], scope='pool3') with tf.variable_scope("inception_5a"): inception_5a = self.get_inception_layer( pool3, 256, 160, 320, 32, 128, 128 ) with tf.variable_scope("inception_5b"): inception_5b = self.get_inception_layer( inception_5a, 384, 192, 384, 48, 128, 128 ) pool4 = layers.avg_pool2d(inception_5b, [ 2, 2 ], stride = 1, scope='pool4') reshape = tf.reshape( pool4, [-1, 1024*3*3] ) dropout = layers.dropout( reshape,self.keep_prob, is_training = self.is_training ) logits = layers.fully_connected( dropout, self.n_classes, activation_fn=None, scope='logits') predictions = tf.nn.softmax(logits, name='predictions') self.softmax_linear=predictions
def subsample(inputs, factor, scope): if factor == 1: return inputs else: # avg for auto encoder return avg_pool2d(inputs,[1,1], stride=factor, padding='SAME', scope=scope)
def build_net(x, n, is_training, FLAGS): shape = x.get_shape().as_list() with tf.variable_scope('pre'): pre = layers.conv2d( inputs=x, num_outputs=16, kernel_size=[3, 3], scope='conv', weights_initializer=tf.truncated_normal_initializer( stddev=math.sqrt(2.0 / 9.0 / shape[3])), reuse=tf.AUTO_REUSE, weights_regularizer=layers.l2_regularizer(FLAGS.weight_decay), normalizer_fn=layers.batch_norm, normalizer_params={ 'is_training': is_training, 'reuse': tf.AUTO_REUSE, 'scope': 'bn_p' }) # pre = layers.max_pool2d(pre, [2, 2], padding='SAME', scope='pool') h = pre for i in range(1, n + 1): h = block(h, 16, FLAGS.weight_decay, '16_block{}'.format(i), is_training) h = block(h, 32, FLAGS.weight_decay, '32_block1', is_training, True) for i in range(2, n + 1): h = block(h, 32, FLAGS.weight_decay, '32_block{}'.format(i), is_training) h = block(h, 64, FLAGS.weight_decay, '64_block1', is_training, True) for i in range(2, n + 1): h = block(h, 64, FLAGS.weight_decay, '64_block{}'.format(i), is_training) shape = h.get_shape().as_list() h = layers.avg_pool2d(h, [shape[1], shape[2]], scope='global_pool') h = layers.conv2d(inputs=h, num_outputs=FLAGS.num_classes, kernel_size=[1, 1], scope='fc1', padding='VALID', weights_initializer=tf.truncated_normal_initializer( stddev=math.sqrt(2.0 / 64 / FLAGS.num_classes)), reuse=tf.AUTO_REUSE, weights_regularizer=layers.l2_regularizer( FLAGS.weight_decay), normalizer_fn=layers.batch_norm, activation_fn=None, normalizer_params={ 'is_training': is_training, 'reuse': tf.AUTO_REUSE, 'scope': 'bn_fc' }) return tf.reshape(h, [-1, FLAGS.num_classes])
def wrn(net, k, n, num_classes=None): net = block(net, [3], n, 16, False) # 32 net = block(net, [3, 3], n, 16 * k, False) # 32 net = block(net, [3, 3], n, 32 * k, True) # 16 net = block(net, [3, 3], n, 64 * k, True) # 8 if not num_classes is None: net = avg_pool2d(net, 8, 8) net = conv2d(net, num_classes, 1, activation_fn=None) pass return net
def inference(inputs, is_training=True, name_scope='Simple'): ngf = 16 container = tf.contrib.eager.EagerVariableStore() with tf.variable_scope(name_scope, reuse=tf.AUTO_REUSE): with container.as_default(): conv1 = layers.conv2d(inputs, ngf, 3, 1, scope='conv1') pool1 = layers.avg_pool2d(conv1, 2, padding='SAME', scope='pool1') conv2 = layers.conv2d(pool1, ngf * 2, 3, 1, scope='conv2') pool2 = layers.avg_pool2d(conv2, 2, padding='SAME', scope='pool2') conv3 = layers.conv2d(pool2, ngf * 4, 3, 1, scope='conv3') pool3 = layers.avg_pool2d(conv3, 2, padding='SAME', scope='pool3') conv4 = layers.conv2d(pool3, ngf * 8, 3, 1, scope='conv4') pool4 = layers.avg_pool2d(conv4, 2, padding='SAME', scope='pool4') flatten = layers.flatten(pool4) fc1 = layers.fully_connected(flatten, 128, scope='fc1') fc1 = layers.dropout(fc1, 0.5, is_training=is_training, scope='drop1') fc2 = layers.fully_connected(fc1, 128, scope='fc2') fc2 = layers.dropout(fc2, 0.5, is_training=is_training, scope='drop2') fc3 = layers.fully_connected(fc2, 128, scope='fc3') fc3 = layers.dropout(fc3, 0.5, is_training=is_training, scope='drop3') logits = layers.fully_connected(fc3, 2, activation_fn=tf.nn.sigmoid, scope='logits') return logits, container
def grid_residual(name, l, is_training, increase_dim=False, first=False, one_c=False): bn_params = { 'is_training': is_training, 'fused': True, 'data_format': 'NCHW' } shape = l.get_shape().as_list() in_channel = shape[1] if increase_dim: out_channel = in_channel * 2 stride1 = 2 else: out_channel = in_channel stride1 = 1 with tf.variable_scope(name) as scope: b1 = l if first else tf.nn.relu( cl.batch_norm( l, is_training=is_training, fused=True, data_format='NCHW')) c1 = gridconv2d(b1, scope='conv1', num_outputs=out_channel, kernel_size=[3, 3], stride=stride1, activation_fn=tf.nn.relu, padding='SAME', data_format='NCHW', one_c=one_c, normalizer_fn=cl.batch_norm, normalizer_params=bn_params) c2 = gridconv2d(c1, scope='conv2', num_outputs=out_channel, kernel_size=[3, 3], stride=1, activation_fn=None, padding='SAME', data_format='NCHW', one_c=one_c, normalizer_fn=None, normalizer_params=None) if increase_dim: l = cl.avg_pool2d(l, kernel_size=2, stride=2, data_format='NCHW') l = tf.pad( l, [[0, 0], [in_channel // 2, in_channel // 2], [0, 0], [0, 0]]) l = c2 + l return l
def view_volume_reverse_projection(depth, img_feat, scene_info, vox_size, multi_scale=False, test_mode=False): """ View volume projection -- using reverse projection :param depth: the depth map :param img_feat: the 2d features map :param scene_info: the scene information, including volume and transform matrix :param vox_size: the projected volume size :param multi_scale: using multiple scale features maps, based on the volume and image relationship :param test_mode: if in test mode, the function will return more intermediary results. :type depth: tf.Tensor or tf.Variable :type img_feat: tf.Tensor or tf.Variable :type scene_info: tf.Tensor or tf.Variable :type vox_size: list(int) :type multi_scale: bool :type test_mode: bool :return: tf.Tensor """ if multi_scale: raise NotImplementedError vox_size = [int(size) for size in vox_size] batch_size = int(depth.shape[0]) img_scale = int(int(depth.shape[-2]) / int(img_feat.shape[-2])) depth_batches, img_feat_batches, scene_info_batches = [tf.split(item, batch_size, axis=0) for item in [depth, img_feat, scene_info]] feat_3d_batches = [] intermediate = [[], [], [], []] for depth_batch, img_feat_batch, scene_info_batch in zip(depth_batches, img_feat_batches, scene_info_batches): _, img_proj_pos, vox_occupied = img2vol_forward_projection(depth_batch, scene_info_batch, vox_size) img_proj_pos = layers.avg_pool2d(img_proj_pos, img_scale, img_scale) vox_origin, cam_pose, vox_unit, _ = tf.split(scene_info_batch, [3, 16, 2, 9], axis=-1) vox_proj_pos = reverse_projection(cam_pose, vox_unit, vox_origin, vox_size) vox_occupied_indices = tf.where(tf.reshape(vox_occupied, shape=vox_occupied.shape[:-1]) > 0) vox_proj_pos = tf.expand_dims(tf.gather_nd(vox_proj_pos, vox_occupied_indices), axis=0) intermediate[0].append(vox_proj_pos) interp_points = camera_to_image(vox_proj_pos, scene_info_batch, img_scale) intermediate[1].append(interp_points) interp_weights, interp_distance = interpolation_weights(interp_points, vox_proj_pos, img_proj_pos, max_distance=0.04 * img_scale) intermediate[2].append(interp_weights) intermediate[3].append(interp_distance) interp_feat = bilinear_interpolation(img_feat_batch, interp_points, interp_weights) interp_feat = tf.reshape(interp_feat, shape=[-1, int(interp_feat.shape[-1])]) feat_3d_shape = tf.constant([1, ] + vox_size + [int(img_feat.shape[-1]), ], dtype=tf.int64) feat_3d_batch = tf.scatter_nd(vox_occupied_indices, interp_feat, feat_3d_shape) feat_3d_batches.append(feat_3d_batch) feat_3d = tf.concat(feat_3d_batches, axis=0) if test_mode: return feat_3d, intermediate else: return feat_3d
def transition_layer(self, x, filters, scope): # [BN --> conv11 --> avg_pool2] with tf.name_scope(scope): x = batch_norm(x, training=self.training, scope=scope + '_bn1') x = tf.nn.relu(x, name=scope + '_relu1') x = conv_layer(x, filters, 1, 1, scope=scope + '_conv1') if self.dropout_rate: x = dropout_layer(x, self.dropout_rate, self.training) x = layers.avg_pool2d(x, 2, 2) return x
def ResidualBlock(x, dim): # w = math.sqrt(2) h1 = tf.nn.leaky_relu(layers.conv2d(x, dim, kernel_size=3, stride=1, rate=1, normalizer_fn=layers.layer_norm, weights_initializer=layers.xavier_initializer(uniform=False))) # , scope='g_' + str(sp) + '_conv1', reuse=tf.AUTO_REUSE h2 = tf.nn.leaky_relu(layers.conv2d(h1, dim, kernel_size=3, stride=1, rate=1, normalizer_fn=layers.layer_norm, weights_initializer=layers.xavier_initializer(uniform=False))) if x.shape != h2.shape: n, hh, ww, c = x.shape pad_c = h2.shape[3] - c p = tf.zeros([n, hh, ww, pad_c], dtype=np.float32) x = tf.concat([p, x], 3) if x.shape[1] != h2.shape[1]: x = layers.avg_pool2d(x, 1, 2) return h2 + x
def avg_pool2d(input, kernel_size=3, stride=2, padding='VALID', name=None, data_format='NHWC'): return contrib_layers.avg_pool2d(input, kernel_size=kernel_size, stride=stride, padding=padding, scope=name, data_format=data_format)
def get_symble(input_image, **kargs): ''' layers>=500: filter_list = [64, 256, 512, 1024, 2048] layers<500: filter_list = [64, 64, 128, 256, 512] ''' w_decay = kargs.get('w_decay', 1e-5) net_name = kargs.get('net_name', 'resnet50') train_fg = kargs.get('train_fg', True) class_num = kargs.get('class_num', 81) w_r = tfc.l2_regularizer(w_decay) assert net_name.lower() in ['resnet50', 'resnet100' ], "Please sel netname: resnet50 or resnet100" if net_name.lower() == 'resnet18': block_num_list = [2, 2, 2, 2] elif net_name.lower() == 'resnet34': block_num_list = [3, 4, 6, 3] with tf.variable_scope(net_name): res_base_conv = Conv_block(input_image,7,conv_stride=2,filter_num=64,relu_type='relu6',\ w_regular=w_r,**kargs) C1 = tfc.max_pool2d(res_base_conv, 3, stride=2, padding='SAME', scope='res_base_pool') C2 = res_block_seq(C1, 3, kernel_num_in=64, kernel_num_out=64, w_regular=w_r, seq_name='res2', **kargs) C3 = res_block_seq(C2,4,seq_stride=2,kernel_num_in=64,kernel_num_out=128,w_regular=w_r,\ seq_name='res3',**kargs) if 'resnet50' in net_name: C4 = res_block_seq(C3,6,seq_stride=2,kernel_num_in=128,kernel_num_out=256,w_regular=w_r,\ seq_name='res4',**kargs) elif 'resnet100' in net_name: C4 = res_block_seq(C3,23,seq_stride=2,kernel_num_in=128,kernel_num_out=256,w_regular=w_r,\ seq_name='res4',**kargs) else: print("Please input net name in:[resnet50,resnet100]") return None C5 = res_block_seq(C4,3,seq_stride=2,kernel_num_in=256,kernel_num_out=512,w_regular=w_r,\ seq_name='res5',**kargs) p2 = tfc.avg_pool2d(C5, 7, stride=1, padding='SAME', scope='pool2') flat = tfc.flatten(p2, scope='flat') fc = tfc.fully_connected(flat,class_num,activation_fn=tf.nn.relu6,trainable=train_fg,\ weights_regularizer=w_r,scope='fc') dp = tfc.dropout(fc, keep_prob=0.5, is_training=train_fg, scope='drop_out') return dp
def build_net(x): weight_decay = FLAGS.weight_decay h1 = layers.conv2d(inputs=x, num_outputs=32, kernel_size=[5, 5], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), scope='conv1', normalizer_fn=layers.batch_norm) h1 = layers.avg_pool2d(inputs=h1, kernel_size=[3, 3], padding='SAME', scope='pool1') h2 = layers.conv2d(inputs=h1, num_outputs=32, kernel_size=[5, 5], weights_initializer=tf.truncated_normal_initializer(stddev=0.05), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), scope='conv2', normalizer_fn=layers.batch_norm) h2 = layers.avg_pool2d(inputs=h2, kernel_size=[3, 3], padding='SAME', scope='pool2') h3 = layers.conv2d(inputs=h2, num_outputs=64, kernel_size=[5, 5], weights_initializer=tf.truncated_normal_initializer(stddev=0.05), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), scope='conv3', normalizer_fn=layers.batch_norm) h3 = layers.avg_pool2d(inputs=h3, kernel_size=[3, 3], padding='SAME', scope='pool3') h4 = layers.conv2d(inputs=h3, num_outputs=64, kernel_size=[4, 4], weights_initializer=tf.truncated_normal_initializer(stddev=0.05), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), padding='VALID', scope='conv4', normalizer_fn=layers.batch_norm) keep_prob = tf.placeholder(tf.float32, name="keep_prob") h4 = layers.dropout(inputs=h4, keep_prob=keep_prob, scope='dropout') h5 = layers.fully_connected(inputs=h4, num_outputs=10, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.05), weights_regularizer=layers.l2_regularizer(weight_decay), biases_regularizer=layers.l2_regularizer(weight_decay), scope='fc1') h5 = tf.reshape(h5, [-1, 10]) return h5, keep_prob
def residual_conv_block(net, num_filters, kernel_size, stride, is_training=True): # let us cache the input tensor and downsample it inp = tfl.avg_pool2d(net, kernel_size, stride, padding="SAME") # now convolve with stride (potential downsampling) net = tfl.conv2d(net, num_filters, kernel_size, stride, activation_fn=tf.identity, padding="SAME") # normalize the output net = tfl.batch_norm(net, is_training=is_training, activation_fn=tf.identity) # now convolve again but do not downsample net = tfl.conv2d(net, num_filters, kernel_size, stride=1, activation_fn=tf.identity, padding="SAME") return prelu(tf.concat((net, inp), axis=-1))
def block_output(net, endpoints, num_classes, dropout_keep_prob=0.5): with tf.variable_scope('Output'): shape = net.get_shape() net = layers.avg_pool2d(net, shape[1:3], padding='VALID', scope='Pool1_Global') endpoints['Output/Pool1'] = net # 1 x 1 x 2048 net = layers.dropout(net, dropout_keep_prob) net = layers.flatten(net) # 2048 logits = layers.fully_connected(net, num_classes, activation_fn=None, scope='Logits') # num_classes endpoints['Logits'] = logits return logits
def block_aux(net, endpoints, num_classes): with tf.variable_scope('Aux'): aux_logits = layers.avg_pool2d(net, [5, 5], stride=3, padding='VALID', scope='Pool1') aux_logits = layers.conv2d(aux_logits, 128, [1, 1], scope='Conv1') shape = aux_logits.get_shape() #stddev=0.01, aux_logits = layers.conv2d(aux_logits, 768, shape[1:3], padding='VALID', scope='Conv2') aux_logits = layers.flatten(aux_logits) #stddev=0.001 aux_logits = layers.fully_connected(aux_logits, num_classes, activation_fn=None, scope='AuxLogits') endpoints['AuxLogits'] = aux_logits return aux_logits
def avg_pool2d(input, kernel_size=3, stride=2, padding='VALID', name=None, data_format='NHWC'): return contrib_layers.avg_pool2d(input, kernel_size=kernel_size, stride=stride, padding=padding, scope=name, data_format=data_format)