def generator(tensor): reuse = len([t for t in tf.global_variables() if t.name.startswith('generator')]) > 0 print tensor.get_shape() with variable_scope.variable_scope('generator', reuse = reuse): tensor = slim.fully_connected(tensor, 1024) print tensor tensor = slim.batch_norm(tensor, activation_fn=tf.nn.relu) tensor = slim.fully_connected(tensor, 7*7*128) tensor = slim.batch_norm(tensor, activation_fn=tf.nn.relu) tensor = tf.reshape(tensor, [-1, 7, 7, 128]) # print '22',tensor.get_shape() tensor = slim.conv2d_transpose(tensor, 64, kernel_size=[4,4], stride=2, activation_fn = None) print 'gen',tensor.get_shape() tensor = slim.batch_norm(tensor, activation_fn = tf.nn.relu) tensor = slim.conv2d_transpose(tensor, 1, kernel_size=[4, 4], stride=2, activation_fn=tf.nn.sigmoid) return tensor
def generator(z): with slim.arg_scope([slim.fully_connected], normalizer_fn=slim.batch_norm, activation_fn=tf.nn.relu ): net = slim.fully_connected(z, 1024) net = slim.fully_connected(net, 128*7*7) net = tf.reshape(net, [-1, 7, 7, 128]) with slim.arg_scope([slim.conv2d_transpose], normalizer_fn=slim.batch_norm, kernel_size=5, stride=2, padding='SAME', activation_fn=tf.nn.relu ): net = slim.conv2d_transpose(net, 128) net = slim.conv2d_transpose(net, 1, activation_fn=tf.nn.tanh, normalizer_fn=None) return net
def generator(self, inputs, reuse=False): # inputs: (batch, 1, 1, 128) with tf.variable_scope('generator', reuse=reuse): with slim.arg_scope([slim.conv2d_transpose], padding='SAME', activation_fn=None, stride=2, weights_initializer=tf.contrib.layers.xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode=='train')): net = slim.conv2d_transpose(inputs, 512, [4, 4], padding='VALID', scope='conv_transpose1') # (batch_size, 4, 4, 512) net = slim.batch_norm(net, scope='bn1') net = slim.conv2d_transpose(net, 256, [3, 3], scope='conv_transpose2') # (batch_size, 8, 8, 256) net = slim.batch_norm(net, scope='bn2') net = slim.conv2d_transpose(net, 128, [3, 3], scope='conv_transpose3') # (batch_size, 16, 16, 128) net = slim.batch_norm(net, scope='bn3') net = slim.conv2d_transpose(net, 1, [3, 3], activation_fn=tf.nn.tanh, scope='conv_transpose4') # (batch_size, 32, 32, 1) return net
def generative_network(z): """Generative network to parameterize generative model. It takes latent variables as input and outputs the likelihood parameters. logits = neural_network(z) """ with slim.arg_scope([slim.conv2d_transpose], activation_fn=tf.nn.elu, normalizer_fn=slim.batch_norm, normalizer_params={'scale': True}): net = tf.reshape(z, [M, 1, 1, d]) net = slim.conv2d_transpose(net, 128, 3, padding='VALID') net = slim.conv2d_transpose(net, 64, 5, padding='VALID') net = slim.conv2d_transpose(net, 32, 5, stride=2) net = slim.conv2d_transpose(net, 1, 5, stride=2, activation_fn=None) net = slim.flatten(net) return net
def prediction_layer(cfg, input, name, num_outputs): with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='SAME', activation_fn=None, normalizer_fn=None, weights_regularizer=slim.l2_regularizer(cfg.weight_decay)): with tf.variable_scope(name): pred = slim.conv2d_transpose(input, num_outputs, kernel_size=[3, 3], stride=2, scope='block4') return pred
def inference(self): _x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.05)): # 1*H*W -> 32*H*W model = slim.conv2d(_x, 32, [3, 3], padding='SAME', scope='conv1') # 32*H*W -> 1024*H/16*W/16 model = slim.conv2d(model, 1024, [16, 16], padding='VALID', scope='conv2', stride=16) model = slim.conv2d_transpose(model, self.input_shape[2], [16, 16], stride=16, padding='VALID', activation_fn=None, scope='deconv_1') return model
def fcn_model_vgg(inputs, num_classes=21, is_training=True, dropout_keep_prob=0.8, scope='vgg_16', reuse=None): if not is_training: dropout_keep_prob = 1.0 with tf.variable_scope(scope, reuse=reuse): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') net = slim.conv2d_transpose(net, 256, kernel_size=(3, 3), stride=(2, 2), scope="deconv1") net = slim.batch_norm(net, is_training=is_training) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout1') net = slim.conv2d_transpose(net, 128, kernel_size=(3, 3), stride=(2, 2), scope="deconv2") net = slim.batch_norm(net, is_training=is_training) net = slim.conv2d_transpose(net, 64, kernel_size=(3, 3), stride=(4, 4), scope="deconv3") net = slim.batch_norm(net, is_training=is_training) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout2') net = slim.conv2d_transpose(net, 32, kernel_size=(3, 3), stride=(2, 2), scope="deconv4") preds = slim.conv2d(net, num_classes, [2, 2], activation_fn=None, scope="conv6") return preds
def build_pred(x_in, H, phase): ''' This function builds the prediction model ''' num_class = H['num_class'] conv_kernel_1 = [1, 1] conv_kernel_3 = [3, 3] pool_kernel = [2, 2] pool_stride = 2 early_feature = {} reuse = {'train': False, 'validate': True, 'test': False}[phase] with slim.arg_scope(argument_scope(H, phase)): scope_name = 'block_1' x_input = x_in num_outputs = 64 with tf.variable_scope(scope_name, reuse=reuse): layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'block_2' x_input = slim.max_pool2d(layer_2) num_outputs = 128 with tf.variable_scope(scope_name, reuse=reuse): layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'block_3' x_input = slim.max_pool2d(layer_2) num_outputs = 256 with tf.variable_scope(scope_name, reuse=reuse): layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'block_4' x_input = slim.max_pool2d(layer_2) num_outputs = 512 with tf.variable_scope(scope_name, reuse=reuse): layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'block_5' x_input = slim.max_pool2d(layer_2) num_outputs = 1024 with tf.variable_scope(scope_name, reuse=reuse): layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'block_6' num_outputs = 512 with tf.variable_scope(scope_name, reuse=reuse): trans_layer = slim.conv2d_transpose( layer_2, num_outputs, pool_kernel, pool_stride, scope='conv_trans') x_input = tf.concat([early_feature['block_4'], trans_layer], axis=3) layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'block_7' num_outputs = 256 with tf.variable_scope(scope_name, reuse=reuse): trans_layer = slim.conv2d_transpose( layer_2, num_outputs, pool_kernel, pool_stride, scope='conv_trans') x_input = tf.concat([early_feature['block_3'], trans_layer], axis=3) layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'block_8' num_outputs = 128 with tf.variable_scope(scope_name, reuse=reuse): trans_layer = slim.conv2d_transpose( layer_2, num_outputs, pool_kernel, pool_stride, scope='conv_trans') x_input = tf.concat([early_feature['block_2'], trans_layer], axis=3) layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'block_9' num_outputs = 64 with tf.variable_scope(scope_name, reuse=reuse): trans_layer = slim.conv2d_transpose( layer_2, num_outputs, pool_kernel, pool_stride, scope='conv_trans') x_input = tf.concat([early_feature['block_1'], trans_layer], axis=3) layer_1 = slim.conv2d(x_input, num_outputs, conv_kernel_3, scope='conv1') layer_2 = slim.conv2d(layer_1, num_outputs, conv_kernel_3, scope='conv2') early_feature[scope_name] = layer_2 scope_name = 'pred' with tf.variable_scope(scope_name, reuse=reuse): layer_1 = slim.conv2d(layer_2, 1, conv_kernel_1, scope='conv1', activation_fn=None, normalizer_fn=None) early_feature[scope_name] = layer_1 # pred = tf.argmax(tf.nn.softmax(logits=layer_1), axis=3) pred = tf.sigmoid(layer_1) return tf.squeeze(layer_1), tf.squeeze(pred)
def generate(self, inputs, is_training=False, name=''): """ Defines graph for generate network :param inputs: tensor with shape [None, z_dim] :param is_training: boolean flag for batch normalization :param name: name of graph (apply same weights for different inputs) :return: generated image """ norm_params = dict(self.normalizer_params.items() + [('is_training', is_training)]) outputs = inputs with tf.name_scope(name=name),\ tf.variable_scope("generator", reuse=self.reuse),\ slim.arg_scope([slim.conv2d_transpose], kernel_size=[5, 5], stride=2, activation_fn=self.activation_fn, normalizer_fn=self.normalizer_fn, normalizer_params=norm_params, padding='SAME'): with tf.variable_scope("projection"): outputs = slim.fully_connected( inputs=outputs, num_outputs=self.start_size * self.start_size * self.channel_depths[0], activation_fn=self.activation_fn, normalizer_fn=self.normalizer_fn, normalizer_params=norm_params) outputs = tf.reshape(outputs, [ -1, self.start_size, self.start_size, self.channel_depths[0] ], name="projection_reshape") logging.debug("Projection: {}".format(outputs)) for deconv_layer_i, deconv_layer in enumerate( self.channel_depths[:-1]): with tf.variable_scope("deconv_{}".format(deconv_layer_i)): outputs = slim.conv2d_transpose(inputs=outputs, num_outputs=deconv_layer, padding='SAME') logging.debug("Deconv layer {}: {}".format( deconv_layer_i, outputs)) with tf.variable_scope("output_deconv"): outputs = slim.conv2d(inputs=outputs, num_outputs=self.channel_depths[-1], activation_fn=tf.nn.tanh, normalizer_fn=None, normalizer_params=None, stride=1, kernel_size=[5, 5], padding='SAME') logging.debug("Generator output: {}".format(outputs)) tf.summary.image('generated_images', tf.div(tf.add(outputs, 1.0), 2.0), max_outputs=5) self.reuse = True self.variables = tf.get_collection( tf.GraphKeys.TRAINABLE_VARIABLES, scope='generator') return outputs
def deconv2d(input_, output_dim, ks=4, s=2, stddev=0.02, name="deconv2d"): with tf.variable_scope(name): return slim.conv2d_transpose(input_, output_dim, ks, s, padding='SAME', activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=stddev), biases_initializer=None)
def deconv(inputs, rate, k, scope=""): scope = "deconv" + scope with tf.variable_scope(scope): rtn = slim.conv2d_transpose(inputs, 2, k, stride=rate) return rtn
def STbaseline(inputs, outputs, loss_weight, labels): """ Spatial stream based on VGG16 Temporal stream based on Flownet simple """ # Mean subtraction (BGR) for flying chairs mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean") # tf.tile(mean, [4,192,256,1]) inputs = inputs - mean outputs = outputs - mean # Scaling to 0 ~ 1 or -0.4 ~ 0.6? inputs = tf.truediv(inputs, 255.0) outputs = tf.truediv(outputs, 255.0) # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7) outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu): # original use leaky ReLU, now we use elu # Contracting part Tconv1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [7, 7], stride=2, scope='Tconv1') Tconv2 = slim.conv2d(Tconv1, 128, [5, 5], stride=2, scope='Tconv2') Tconv3_1 = slim.conv2d(Tconv2, 256, [5, 5], stride=2, scope='Tconv3_1') Tconv3_2 = slim.conv2d(Tconv3_1, 256, [3, 3], scope='Tconv3_2') Tconv4_1 = slim.conv2d(Tconv3_2, 512, [3, 3], stride=2, scope='Tconv4_1') Tconv4_2 = slim.conv2d(Tconv4_1, 512, [3, 3], scope='Tconv4_2') Tconv5_1 = slim.conv2d(Tconv4_2, 512, [3, 3], stride=2, scope='Tconv5_1') Tconv5_2 = slim.conv2d(Tconv5_1, 512, [3, 3], scope='Tconv5_2') Tconv6_1 = slim.conv2d(Tconv5_2, 1024, [3, 3], stride=2, scope='Tconv6_1') Tconv6_2 = slim.conv2d(Tconv6_1, 1024, [3, 3], scope='Tconv6_2') # Hyper-params for computing unsupervised loss epsilon = 0.0001 alpha_c = 0.25 alpha_s = 0.37 lambda_smooth = 1.0 FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights") scale = 2 # for deconvolution # Expanding part pr6 = slim.conv2d(Tconv6_2, 2, [3, 3], activation_fn=None, scope='pr6') h6 = pr6.get_shape()[1].value w6 = pr6.get_shape()[2].value pr6_input = tf.image.resize_bilinear(inputs_norm, [h6, w6]) pr6_output = tf.image.resize_bilinear(outputs_norm, [h6, w6]) flow_scale_6 = 0.3125 # (*20/64) loss6, _ = loss_interp(pr6, pr6_input, pr6_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_6, FlowDeltaWeights) upconv5 = slim.conv2d_transpose(Tconv6_2, 512, [2*scale, 2*scale], stride=scale, scope='upconv5') pr6to5 = slim.conv2d_transpose(pr6, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr6to5') concat5 = tf.concat(3, [Tconv5_2, upconv5, pr6to5]) pr5 = slim.conv2d(concat5, 2, [3, 3], activation_fn=None, scope='pr5') h5 = pr5.get_shape()[1].value w5 = pr5.get_shape()[2].value pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5]) pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5]) flow_scale_5 = 0.625 # (*20/32) loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights) upconv4 = slim.conv2d_transpose(concat5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4') pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4') concat4 = tf.concat(3, [Tconv4_2, upconv4, pr5to4]) pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4') h4 = pr4.get_shape()[1].value w4 = pr4.get_shape()[2].value pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4]) pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4]) flow_scale_4 = 1.25 # (*20/16) loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights) upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3') pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3') concat3 = tf.concat(3, [Tconv3_2, upconv3, pr4to3]) pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3') h3 = pr3.get_shape()[1].value w3 = pr3.get_shape()[2].value pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3]) pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3]) flow_scale_3 = 2.5 # (*20/8) loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights) upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2') pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2') concat2 = tf.concat(3, [Tconv2, upconv2, pr3to2]) pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2') h2 = pr2.get_shape()[1].value w2 = pr2.get_shape()[2].value pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2]) pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2]) flow_scale_2 = 5.0 # (*20/4) loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights) upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1') pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1') concat1 = tf.concat(3, [Tconv1, upconv1, pr2to1]) pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1') h1 = pr1.get_shape()[1].value w1 = pr1.get_shape()[2].value pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1]) pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1]) flow_scale_1 = 10.0 # (*20/2) loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): # conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1') conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1') conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2') pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1') conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1') conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2') pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2') conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1') conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2') conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3') pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3') conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1') conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2') conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3') pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4') conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1') conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2') conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3') pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5') # Incorporate temporal feature concatST = tf.concat(3, [pool5, Tconv5_2]) poolST = slim.max_pool2d(concatST, [2, 2]) # print poolST.get_shape() concat2ST = tf.concat(3, [poolST, Tconv6_2]) # print concat2ST.get_shape() concatDR = slim.conv2d(concat2ST, 512, [1, 1]) # print concatDR.get_shape() flatten5 = slim.flatten(concatDR, scope='flatten5') fc6 = slim.fully_connected(flatten5, 4096, scope='fc6') dropout6 = slim.dropout(fc6, 0.9, scope='dropout6') fc7 = slim.fully_connected(dropout6, 4096, scope='fc7') dropout7 = slim.dropout(fc7, 0.9, scope='dropout7') fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8') prob = tf.nn.softmax(fc8) actionPredictions = tf.argmax(prob, 1) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels) actionLoss = tf.reduce_mean(cross_entropy) # Adding intermediate losses all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \ loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[5]*loss6["total"] + \ loss_weight[0]*actionLoss slim.losses.add_loss(all_loss) losses = [loss1, loss2, loss3, loss4, loss5, loss6, actionLoss] # pr1 = tf.mul(tf.constant(20.0), pr1) flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5, pr6*flow_scale_6] predictions = [prev1, actionPredictions] return losses, flows_all, predictions
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None): """Build the 3-way outputs, i.e., class, box and mask in the pyramid Algo ---- For each layer: 1. Build anchor layer 2. Process the results of anchor layer, decode the output into rois 3. Sample rois 4. Build roi layer 5. Process the results of roi layer, decode the output into boxes 6. Build the mask layer 7. Build losses """ outputs = {} #arg_scope = _extra_conv_arg_scope(activation_fn=None) arg_scope = _extra_conv_arg_scope_with_bn(activation_fn=None) my_sigmoid = None with slim.arg_scope(arg_scope): with tf.variable_scope('pyramid'): # for p in pyramid: outputs['rpn'] = {} for i in range(5, 1, -1): p = 'P%d'%i stride = 2 ** i ## rpn head shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p) box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \ weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid) cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \ weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)] print("anchor_scales = " , anchor_scales) all_anchors = gen_all_anchors(height, width, stride, anchor_scales) outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors} ## gather all rois # print (outputs['rpn']) rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)] rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)] rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)] rpn_boxes = tf.concat(values=rpn_boxes, axis=0) rpn_clses = tf.concat(values=rpn_clses, axis=0) rpn_anchors = tf.concat(values=rpn_anchors, axis=0) outputs['rpn']['box'] = rpn_boxes outputs['rpn']['cls'] = rpn_clses outputs['rpn']['anchor'] = rpn_anchors # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors} rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2])) rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw) # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1]) rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \ sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training) # if is_training: # # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes) # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2) outputs['roi'] = {'box': rois, 'score': scores} ## cropping regions [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \ assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5]) outputs['assigned_rois'] = assigned_rois outputs['assigned_layer_inds'] = assigned_layer_inds cropped_rois = [] ordered_rois = [] pyramid_feature = [] for i in range(5, 1, -1): print(i) p = 'P%d'%i splitted_rois = assigned_rois[i-2] batch_inds = assigned_batch_inds[i-2] cropped, boxes_in_crop = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i, pooled_height=14, pooled_width=14) # cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i, # pooled_height=14, pooled_width=14) cropped_rois.append(cropped) ordered_rois.append(splitted_rois) pyramid_feature.append(tf.transpose(pyramid[p],[0,3,1,2])) # if i is 5: # outputs['tmp_0'] = tf.transpose(pyramid[p],[0,3,1,2]) # outputs['tmp_1'] = splitted_rois # outputs['tmp_2'] = tf.transpose(cropped,[0,3,1,2]) # outputs['tmp_3'] = boxes_in_crop # outputs['tmp_4'] = [ih, iw] cropped_rois = tf.concat(values=cropped_rois, axis=0) ordered_rois = tf.concat(values=ordered_rois, axis=0) outputs['ordered_rois'] = ordered_rois outputs['pyramid_feature'] = pyramid_feature outputs['roi']['cropped_rois'] = cropped_rois tf.add_to_collection('__CROPPED__', cropped_rois) ## refine head # to 7 x 7 cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME') refine = slim.flatten(cropped_regions) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) cls2 = slim.fully_connected(refine, num_classes, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.05)) box = slim.fully_connected(refine, num_classes*4, activation_fn=my_sigmoid, weights_initializer=tf.truncated_normal_initializer(stddev=0.05)) outputs['refined'] = {'box': box, 'cls': cls2} ## decode refine net outputs cls2_prob = tf.nn.softmax(cls2) final_boxes, classes, scores = \ roi_decoder(box, cls2_prob, ordered_rois, ih, iw) #outputs['tmp_0'] = ordered_rois #outputs['tmp_1'] = assigned_rois #outputs['tmp_2'] = box #outputs['tmp_3'] = final_boxes #outputs['tmp_4'] = cls2_prob #outputs['final_boxes'] = {'box': final_boxes, 'cls': classes} outputs['final_boxes'] = {'box': final_boxes, 'cls': classes, 'prob': cls2_prob} ## for testing, maskrcnn takes refined boxes as inputs if not is_training: rois = final_boxes # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \ # assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5]) for i in range(5, 1, -1): p = 'P%d'%i splitted_rois = assigned_rois[i-2] batch_inds = assigned_batch_inds[i-2] cropped, _ = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i, pooled_height=14, pooled_width=14) cropped_rois.append(cropped) ordered_rois.append(splitted_rois) cropped_rois = tf.concat(values=cropped_rois, axis=0) ordered_rois = tf.concat(values=ordered_rois, axis=0) ## mask head m = cropped_rois for _ in range(4): m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu) # to 28 x 28 m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu) tf.add_to_collection('__TRANSPOSED__', m) m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None) # add a mask, given the predicted boxes and classes outputs['mask'] = {'mask':m, 'cls': classes, 'score': scores} return outputs
def interface(self, input_x): with tf.variable_scope('residual_attention_network'): # resnet 头部结构,7*7,stride=2, 然后接一个2*2,stride=3的maxpool sc = arg_scope_.arg_scope(is_training=self.is_training) with slim.arg_scope(sc): conv1 = slim.conv2d(input_x, 64, [7, 7], stride=2, padding='SAME', scope='conv') mpool1 = slim.max_pool2d(conv1, [3, 3], stride=2, padding='SAME', scope='maxpool') residual_out1 = self.residual_block.residual_block( mpool1, 64, scope_name='residual_block1') # 缩小为1/8->80*60 residual_out2 = self.residual_block.residual_block( residual_out1, 128, stride=2, scope_name='residual_block2') # attention_stage1 attention_out1 = self.attention_block_stage0.attention_block_stage0( residual_out2, 128, 1) # decode attention_out0 # 上采样 变成1/2 with slim.arg_scope( arg_scope_.arg_scope(is_training=self.is_training)): decode_attention_out1 = slim.conv2d(attention_out1, 128, [1, 1], stride=1, scope='deconv1-1') decode_attention_out1 = slim.conv2d_transpose( decode_attention_out1, 64, [3, 3], stride=2, scope='deconv1-2') decode_attention_out1 = slim.conv2d(decode_attention_out1, 64, [1, 1], stride=1, scope='deconv1-3') decode_attention_out1 = slim.conv2d_transpose( decode_attention_out1, 1, [3, 3], stride=2, normalizer_fn=None, activation_fn=None, scope='deconv1-4') # 进行一步下采样 # 缩小为1/16->40*30 residual_out3 = self.residual_block.residual_block( attention_out1, 256, stride=2, scope_name='residual_block3') # attention_stage1 # attention_out1_1 = self.attention_block_stage1.attention_block_stage1(residual_out1, 256, 1) attention_out2_2 = self.attention_block_stage1.attention_block_stage1( residual_out3, 256, 2) # decode attention_out2 # 上采样 变成1/4= with slim.arg_scope( arg_scope_.arg_scope(is_training=self.is_training)): decode_attention_out2 = slim.conv2d(attention_out2_2, 256, [1, 1], stride=1, scope='deconv2-1') decode_attention_out2 = slim.conv2d_transpose( decode_attention_out2, 128, [3, 3], stride=2, scope='deconv2-2') decode_attention_out2 = slim.conv2d(decode_attention_out2, 128, [1, 1], stride=1, scope='deconv2-3') decode_attention_out2 = slim.conv2d_transpose( decode_attention_out2, 1, [3, 3], stride=2, normalizer_fn=None, activation_fn=None, scope='deconv2-4') # # 进行一步下采样 # residual_out2 = self.residual_block.residual_block( # attention_out1_2, 512, stride=2, scope_name='residual_block3' # ) # # attention_stage2 # # attention_out2_1 = self.attention_block_stage2.attention_block_stage2(residual_out2, 512, 1) # # attention_out2_2 = self.attention_block_stage2.attention_block_stage2(attention_out2_1, 512, 2) # attention_out2_3 = self.attention_block_stage2.attention_block_stage2(residual_out2, 512, 3) # # # decode attention_out2 # with slim.arg_scope(arg_scope_.arg_scope(is_training=self.is_training)): # decode_attention_out2 = slim.conv2d_transpose( # attention_out2_3, 64, [3, 3], stride=2, scope='deconv3-1' # ) # decode_attention_out2 = slim.conv2d_transpose( # decode_attention_out2, 64, [3, 3], stride=2, scope='deconv3-2' # ) # decode_attention_out2 = slim.conv2d_transpose( # decode_attention_out2, 64, [3, 3], stride=2, scope='deconv3-3' # ) # decode_attention_out2 = slim.conv2d_transpose( # decode_attention_out2, 1, [3, 3], stride=2, # normalizer_fn=None, activation_fn=None, scope='deconv3-4' # ) # 30*23 # 20*15 residual_out4 = self.residual_block.residual_block( attention_out2_2, 512, stride=2, scope_name='residual_block4') residual_out5 = self.residual_block.residual_block( residual_out4, 512, scope_name='residual_block5') # 10*8 residual_out6 = self.residual_block.residual_block( residual_out5, 1024, stride=2, scope_name='residual_block6') global_avg_out = tf.reduce_mean(residual_out6, [1, 2], name='global_avg_pool', keepdims=True) logits = slim.conv2d(global_avg_out, self.num_class, [1, 1], activation_fn=None, normalizer_fn=None, scope='logits') return decode_attention_out1, decode_attention_out2, logits
def interface_resnet50(self, inputs, reuse=None, is_training=False): endpoints = {} with slim.arg_scope(resnet_arg_scope(use_batch_norm=True)): _, resnet_endpoints = resnet_v2_50( inputs, reuse=reuse, is_training=is_training, ) endpoints['net1'] = resnet_endpoints[ 'resnet_v2_50/block1/unit_2/bottleneck_v2'] # 128*128 256 endpoints['net2'] = resnet_endpoints[ 'resnet_v2_50/block2/unit_3/bottleneck_v2'] # 64*64 512 endpoints['net3'] = resnet_endpoints[ 'resnet_v2_50/block3/unit_5/bottleneck_v2'] # 32*32 1024 endpoints['net4'] = resnet_endpoints[ 'resnet_v2_50/block4/unit_3/bottleneck_v2'] # 16*16 2048 with slim.arg_scope( self.fcn_arg_scope(is_training=is_training, normalizer_fn=None)): with tf.variable_scope('cloud_net', 'cloud_net', [inputs], reuse=reuse): with tf.variable_scope('alpha_prediction'): # alpha prediction nets = resnet_endpoints[ 'resnet_v2_50/block4'] # 64*64*2048 nets = slim.conv2d_transpose( nets, 512, kernel_size=[3, 3], stride=2) + resnet_endpoints[ 'resnet_v2_50/block2/unit_2/bottleneck_v2'] nets = slim.conv2d_transpose( nets, 256, kernel_size=[3, 3], stride=2) + resnet_endpoints[ 'resnet_v2_50/block1/unit_2/bottleneck_v2'] nets = slim.conv2d_transpose( nets, 64, kernel_size=[3, 3], stride=2) + resnet_endpoints['resnet_v2_50/conv1'] alpha_logits = slim.conv2d(nets, self.alpha_channel, [3, 3], scope='pred', activation_fn=None) with tf.variable_scope('reflectance_prediction'): # reflectance prediction nets = resnet_endpoints[ 'resnet_v2_50/block4'] # 64*64*2048 nets = slim.conv2d_transpose( nets, 512, kernel_size=[3, 3], stride=2) + resnet_endpoints[ 'resnet_v2_50/block2/unit_2/bottleneck_v2'] nets = slim.conv2d_transpose( nets, 256, kernel_size=[3, 3], stride=2) + resnet_endpoints[ 'resnet_v2_50/block1/unit_2/bottleneck_v2'] nets = slim.conv2d_transpose( nets, 64, kernel_size=[3, 3], stride=2) + resnet_endpoints['resnet_v2_50/conv1'] reflectance_logits = slim.conv2d(nets, self.reflectance_channel, [3, 3], scope='pred', activation_fn=None) return alpha_logits, reflectance_logits
def interface_unet(self, inputs, reuse=None, is_training=True): endpoints = {} with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)): with tf.variable_scope('cloud_net', 'cloud_net', [inputs], reuse=reuse): with tf.variable_scope('feature_exatraction'): nets = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3]) # 508*508*64 endpoints['net1'] = nets nets = slim.max_pool2d(nets, [2, 2]) # 254*254*64 nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3]) # 250*250*128 endpoints['net2'] = nets nets = slim.max_pool2d(nets, [2, 2]) # 125*125*128 nets = slim.repeat(nets, 2, slim.conv2d, 256, [3, 3]) # 121*121*256 endpoints['net3'] = nets nets = slim.max_pool2d(nets, [2, 2]) # 61*61*256 nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3]) # 57*57*512 endpoints['net4'] = nets nets = slim.max_pool2d(nets, [2, 2]) # 29*29*512 nets = slim.repeat(nets, 2, slim.conv2d, 1024, [3, 3]) # 25*25*1024 endpoints['net5'] = nets with tf.variable_scope('alpha_prediction'): nets = endpoints['net5'] nets = slim.conv2d_transpose(nets, 512, [3, 3], stride=2) # 50*50*512 nets = self.crop_and_concat(endpoints['net4'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3]) # 46*46*512 nets = slim.conv2d_transpose(nets, 256, [3, 3], stride=2) # 92*92*256 nets = self.crop_and_concat(endpoints['net3'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 256, [3, 3]) # 88*88*256 nets = slim.conv2d_transpose(nets, 128, [3, 3], stride=2) # 176*176*128 nets = self.crop_and_concat(endpoints['net2'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3]) # 172*172*128 nets = slim.conv2d_transpose(nets, 64, [3, 3], stride=2) # 344*344*64 nets = self.crop_and_concat(endpoints['net1'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 64, [3, 3]) # 340*340*64 logits = slim.conv2d(nets, self.alpha_channel, [3, 3], padding='SAME', activation_fn=None) alpha_logits = tf.image.resize_images( logits, [self.img_size, self.img_size]) with tf.variable_scope('reflectance_prediction'): nets = endpoints['net5'] nets = slim.conv2d_transpose(nets, 512, [3, 3], stride=2) # 50*50*512 nets = self.crop_and_concat(endpoints['net4'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3]) # 46*46*512 nets = slim.conv2d_transpose(nets, 256, [3, 3], stride=2) # 92*92*256 nets = self.crop_and_concat(endpoints['net3'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 256, [3, 3]) # 88*88*256 nets = slim.conv2d_transpose(nets, 128, [3, 3], stride=2) # 176*176*128 nets = self.crop_and_concat(endpoints['net2'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3]) # 172*172*128 nets = slim.conv2d_transpose(nets, 64, [3, 3], stride=2) # 344*344*64 nets = self.crop_and_concat(endpoints['net1'], nets) nets = slim.repeat(nets, 2, slim.conv2d, 64, [3, 3]) # 340*340*64 logits = slim.conv2d(nets, self.reflectance_channel, [3, 3], padding='SAME', activation_fn=None) reflectance_logits = tf.image.resize_images( logits, [self.img_size, self.img_size]) return alpha_logits, reflectance_logits
def interface_vgg16(self, inputs, reuse=None, is_training=True): endpoints = {} with slim.arg_scope(vgg_arg_scope()): _, vgg_end_points = vgg_16(inputs, is_training=is_training, reuse=reuse, spatial_squeeze=False, num_classes=None) endpoints['net1'] = vgg_end_points['vgg_16/conv1/conv1_2'] endpoints['net2'] = vgg_end_points['vgg_16/conv2/conv2_2'] endpoints['net3'] = vgg_end_points['vgg_16/conv3/conv3_3'] endpoints['net4'] = vgg_end_points['vgg_16/conv4/conv4_3'] endpoints['net5'] = vgg_end_points['vgg_16/conv5/conv5_3'] with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)): with tf.variable_scope('cloud_net', 'cloud_net', [inputs], reuse=reuse): with tf.variable_scope('feature_exatraction'): nets = vgg_end_points['vgg_16/conv5/conv5_3'] nets = slim.conv2d(nets, 512, [3, 3], stride=2, scope='pool5') nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3], scope='conv6') endpoints['net6'] = nets nets = slim.conv2d(nets, 512, [3, 3], stride=2, scope='pool6') nets = slim.conv2d(nets, 512, [3, 3], scope='conv7') endpoints['net7'] = nets with tf.variable_scope('alpha_prediction'): # alpha prediction nets = endpoints['net7'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans1') + endpoints['net6'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans2') + endpoints['net5'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans3') + endpoints['net4'] nets = slim.conv2d_transpose( nets, 256, [3, 3], stride=2, scope='conv_trans4') + endpoints['net3'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans5') + endpoints['net2'] nets = slim.conv2d_transpose( nets, 64, [3, 3], stride=2, scope='conv_trans6') + endpoints['net1'] alpha_logits = slim.conv2d(nets, self.alpha_channel, [3, 3], scope='pred', activation_fn=None) with tf.variable_scope('reflectance_prediction'): # reflectance prediction nets = endpoints['net7'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans1') + endpoints['net6'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans2') + endpoints['net5'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans3') + endpoints['net4'] nets = slim.conv2d_transpose( nets, 256, [3, 3], stride=2, scope='conv_trans4') + endpoints['net3'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans5') + endpoints['net2'] nets = slim.conv2d_transpose( nets, 64, [3, 3], stride=2, scope='conv_trans6') + endpoints['net1'] reflectance_logits = slim.conv2d(nets, self.reflectance_channel, [3, 3], scope='pred', activation_fn=None) return alpha_logits, reflectance_logits
def interface_cloudMattingNet(self, inputs, reuse=None, is_training=True): endpoints = {} with slim.arg_scope(self.fcn_arg_scope(is_training=is_training)): with tf.variable_scope('cloud_net', 'cloud_net', [inputs], reuse=reuse): with tf.variable_scope('feature_exatraction'): nets = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') endpoints['net1'] = nets nets = slim.conv2d(nets, 64, [3, 3], stride=2, scope='pool1') nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3], scope='conv2') endpoints['net2'] = nets nets = slim.conv2d(nets, 128, [3, 3], stride=2, scope='pool2') nets = slim.repeat(nets, 2, slim.conv2d, 128, [3, 3], scope='conv3') endpoints['net3'] = nets nets = slim.conv2d(nets, 128, [3, 3], stride=2, scope='pool3') nets = slim.repeat(nets, 2, slim.conv2d, 256, [3, 3], scope='conv4') endpoints['net4'] = nets nets = slim.conv2d(nets, 256, [3, 3], stride=2, scope='pool4') nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3], scope='conv5') endpoints['net5'] = nets nets = slim.conv2d(nets, 512, [3, 3], stride=2, scope='pool5') nets = slim.repeat(nets, 2, slim.conv2d, 512, [3, 3], scope='conv6') endpoints['net6'] = nets nets = slim.conv2d(nets, 512, [3, 3], stride=2, scope='pool6') nets = slim.conv2d(nets, 512, [3, 3], scope='conv7') endpoints['net7'] = nets with tf.variable_scope('alpha_prediction'): # alpha prediction nets = endpoints['net7'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans1') + endpoints['net6'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans2') + endpoints['net5'] nets = slim.conv2d_transpose( nets, 256, [3, 3], stride=2, scope='conv_trans3') + endpoints['net4'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans4') + endpoints['net3'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans5') + endpoints['net2'] nets = slim.conv2d_transpose( nets, 64, [3, 3], stride=2, scope='conv_trans6') + endpoints['net1'] alpha_logits = slim.conv2d(nets, self.alpha_channel, [3, 3], scope='pred', activation_fn=None) with tf.variable_scope('reflectance_prediction'): # reflectance prediction nets = endpoints['net7'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans1') + endpoints['net6'] nets = slim.conv2d_transpose( nets, 512, [3, 3], stride=2, scope='conv_trans2') + endpoints['net5'] nets = slim.conv2d_transpose( nets, 256, [3, 3], stride=2, scope='conv_trans3') + endpoints['net4'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans4') + endpoints['net3'] nets = slim.conv2d_transpose( nets, 128, [3, 3], stride=2, scope='conv_trans5') + endpoints['net2'] nets = slim.conv2d_transpose( nets, 64, [3, 3], stride=2, scope='conv_trans6') + endpoints['net1'] reflectance_logits = slim.conv2d(nets, self.reflectance_channel, [3, 3], scope='pred', activation_fn=None) return alpha_logits, reflectance_logits
def STsingle(inputs, outputs, loss_weight, labels): # Mean subtraction (BGR) for flying chairs mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean") # tf.tile(mean, [4,192,256,1]) inputs = inputs - mean outputs = outputs - mean # Scaling to 0 ~ 1 or -0.4 ~ 0.6? inputs = tf.truediv(inputs, 255.0) outputs = tf.truediv(outputs, 255.0) # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7) outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected], activation_fn=tf.nn.elu): ''' Shared conv layers ''' conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1') # conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1') conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2') pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1') conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1') conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2') pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2') conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1') conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2') conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3') pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3') conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1') conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2') conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3') pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4') conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1') conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2') conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3') pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5') # print pool5.get_shape() ''' Spatial branch ''' flatten5 = slim.flatten(pool5, scope='flatten5') fc6 = slim.fully_connected(flatten5, 4096, scope='fc6') dropout6 = slim.dropout(fc6, 0.9, scope='dropout6') fc7 = slim.fully_connected(dropout6, 4096, scope='fc7') dropout7 = slim.dropout(fc7, 0.9, scope='dropout7') fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8') prob = tf.nn.softmax(fc8) actionPredictions = tf.argmax(prob, 1) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels) actionLoss = tf.reduce_mean(cross_entropy) ''' Temporal branch ''' # Hyper-params for computing unsupervised loss epsilon = 0.0001 alpha_c = 0.3 alpha_s = 0.3 lambda_smooth = 0.8 FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights") scale = 2 # for deconvolution # Expanding part pr5 = slim.conv2d(pool5, 2, [3, 3], activation_fn=None, scope='pr5') h5 = pr5.get_shape()[1].value w5 = pr5.get_shape()[2].value pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5]) pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5]) flow_scale_5 = 0.625 # (*20/32) loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights) upconv4 = slim.conv2d_transpose(pool5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4') pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4') concat4 = tf.concat(3, [pool4, upconv4, pr5to4]) pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4') h4 = pr4.get_shape()[1].value w4 = pr4.get_shape()[2].value pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4]) pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4]) flow_scale_4 = 1.25 # (*20/16) loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights) upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3') pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3') concat3 = tf.concat(3, [pool3, upconv3, pr4to3]) pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3') h3 = pr3.get_shape()[1].value w3 = pr3.get_shape()[2].value pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3]) pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3]) flow_scale_3 = 2.5 # (*20/8) loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights) upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2') pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2') concat2 = tf.concat(3, [pool2, upconv2, pr3to2]) pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2') h2 = pr2.get_shape()[1].value w2 = pr2.get_shape()[2].value pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2]) pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2]) flow_scale_2 = 5.0 # (*20/4) loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights) upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1') pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1') concat1 = tf.concat(3, [pool1, upconv1, pr2to1]) pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1') h1 = pr1.get_shape()[1].value w1 = pr1.get_shape()[2].value pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1]) pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1]) flow_scale_1 = 10.0 # (*20/2) loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights) # Adding intermediate losses all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \ loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[0]*actionLoss slim.losses.add_loss(all_loss) losses = [loss1, loss2, loss3, loss4, loss5, actionLoss] flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5] predictions = [prev1, actionPredictions] return losses, flows_all, predictions
def ppm(input, end_points, name=None): with tf.variable_scope('Pyramid_Pooling'): end_point = name + 'branch_0' net = slim.avg_pool2d(input, [10, 37], stride=1, padding='VALID', scope=end_point) end_points[end_point] = net end_point = end_point + 'conv_0' net = slim.conv2d(net, 1, [1, 1], stride=1, padding='SAME', scope=end_point) end_points[end_point] = net end_point = end_point + 'up' net = slim.conv2d_transpose(net, 1, [10, 37], stride=2, padding='VALID', scope=end_point) end_points[end_point] = net end_point = name + 'branch_1' net = slim.avg_pool2d(input, [5, 18], stride=[5, 18], padding='VALID', scope=end_point) end_points[end_point] = net end_point = name + 'conv_1' net = slim.conv2d(net, 1, [1, 1], stride=1, padding='SAME', scope=end_point) end_points[end_point] = net end_point = name + 'branch_2' net = slim.avg_pool2d(input, [3, 12], stride=[3, 12], padding='VALID', scope=end_point) end_points[end_point] = net end_point = name + 'conv_2' net = slim.conv2d(net, 1, [1, 1], stride=1, padding='SAME', scope=end_point) end_points[end_point] = net end_point = name + 'branch_3' net = slim.avg_pool2d(input, [2, 7], stride=[2, 7], padding='VALID', scope=end_point) end_points[end_point] = net end_point = name + 'conv_3' net = slim.conv2d(net, 1, [1, 1], stride=1, padding='SAME', scope=end_point) end_points[end_point] = net
def pose_exp_net(tgt_image, src_image_stack, do_exp=False, is_training=True, isReuse=None): inputs = tf.concat([tgt_image, src_image_stack], axis=3) num_source = 2 with tf.variable_scope('pose_exp_net', reuse=isReuse) as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], normalizer_fn=None, weights_regularizer=slim.l2_regularizer(0.05), activation_fn=tf.nn.relu, outputs_collections=end_points_collection): # cnv1 to cnv5b are shared between pose and explainability prediction cnv1 = slim.conv2d(inputs, 16, [7, 7], stride=2, scope='cnv1') cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2') cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3') cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4') cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5') # Pose specific layers with tf.variable_scope('pose'): cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6') cnv7 = slim.conv2d(cnv6, 256, [3, 3], stride=2, scope='cnv7') pose_pred = slim.conv2d(cnv7, 6 * num_source, [1, 1], scope='pred', stride=1, normalizer_fn=None, activation_fn=None) pose_avg = tf.reduce_mean(pose_pred, [1, 2]) # Empirically we found that scaling by a small constant # facilitates training. pose_final = 0.01 * tf.reshape(pose_avg, [-1, 6 * num_source]) # Exp mask specific layers if do_exp: with tf.variable_scope('exp'): upcnv5 = slim.conv2d_transpose(cnv5, 256, [3, 3], stride=2, scope='upcnv5') upcnv4 = slim.conv2d_transpose(upcnv5, 128, [3, 3], stride=2, scope='upcnv4') mask4 = slim.conv2d(upcnv4, num_source * 2, [3, 3], stride=1, scope='mask4', normalizer_fn=None, activation_fn=None) upcnv3 = slim.conv2d_transpose(upcnv4, 64, [3, 3], stride=2, scope='upcnv3') mask3 = slim.conv2d(upcnv3, num_source * 2, [3, 3], stride=1, scope='mask3', normalizer_fn=None, activation_fn=None) upcnv2 = slim.conv2d_transpose(upcnv3, 32, [5, 5], stride=2, scope='upcnv2') mask2 = slim.conv2d(upcnv2, num_source * 2, [5, 5], stride=1, scope='mask2', normalizer_fn=None, activation_fn=None) upcnv1 = slim.conv2d_transpose(upcnv2, 16, [7, 7], stride=2, scope='upcnv1') mask1 = slim.conv2d(upcnv1, num_source * 2, [7, 7], stride=1, scope='mask1', normalizer_fn=None, activation_fn=None) else: mask1 = None mask2 = None mask3 = None mask4 = None end_points = utils.convert_collection_to_dict( end_points_collection) return pose_final, [mask1, mask2, mask3, mask4], end_points
def deconv(self, x, num_out_layers, kernel_size, scale): p_x = tf.pad(x, [[0, 0], [1, 1], [1, 1], [0, 0]]) conv = slim.conv2d_transpose(p_x, num_out_layers, kernel_size, scale, 'SAME') return conv[:, 3:-1, 3:-1, :]
def FLowNetSimple(data): # link for code used in this function: https://github.com/linjian93/tf-flownet/blob/master/train_flownet_simple.py concat1 = data conv1 = slim.conv2d(concat1, 64, [7, 7], 2, scope='conv1') conv2 = slim.conv2d(conv1, 128, [5, 5], 2, scope='conv2') conv3 = slim.conv2d(conv2, 256, [5, 5], 2, scope='conv3') conv3_1 = slim.conv2d(conv3, 256, [3, 3], 1, scope='conv3_1') conv4 = slim.conv2d(conv3_1, 512, [3, 3], 2, scope='conv4') conv4_1 = slim.conv2d(conv4, 512, [3, 3], 1, scope='conv4_1') conv5 = slim.conv2d(conv4_1, 512, [3, 3], 2, scope='conv5') conv5_1 = slim.conv2d(conv5, 512, [3, 3], 1, scope='conv5_1') conv6 = slim.conv2d(conv5_1, 1024, [3, 3], 2, scope='conv6') conv6_1 = slim.conv2d(conv6, 1024, [3, 3], 1, scope='conv6_1') predict6 = slim.conv2d(conv6_1, 2, [3, 3], 1, activation_fn=None, scope='pred6') # 12 * 16 flow deconv5 = slim.conv2d_transpose(conv6_1, 512, [4, 4], 2, scope='deconv5') deconvflow6 = slim.conv2d_transpose(predict6, 2, [4, 4], 2, 'SAME', scope='deconvflow6') concat5 = tf.concat((conv5_1, deconv5, deconvflow6), axis=3, name='concat5') predict5 = slim.conv2d(concat5, 2, [3, 3], 1, 'SAME', activation_fn=None, scope='predict5') # 24 * 32 flow deconv4 = slim.conv2d_transpose(concat5, 256, [4, 4], 2, 'SAME', scope='deconv4') deconvflow5 = slim.conv2d_transpose(predict5, 2, [4, 4], 2, 'SAME', scope='deconvflow5') concat4 = tf.concat((conv4_1, deconv4, deconvflow5), axis=3, name='concat4') predict4 = slim.conv2d(concat4, 2, [3, 3], 1, 'SAME', activation_fn=None, scope='predict4') # 48 * 64 flow deconv3 = slim.conv2d_transpose(concat4, 128, [4, 4], 2, 'SAME', scope='deconv3') deconvflow4 = slim.conv2d_transpose(predict4, 2, [4, 4], 2, 'SAME', scope='deconvflow4') concat3 = tf.concat((conv3_1, deconv3, deconvflow4), axis=3, name='concat3') predict3 = slim.conv2d(concat3, 2, [3, 3], 1, 'SAME', activation_fn=None, scope='predict3') # 96 * 128 flow deconv2 = slim.conv2d_transpose(concat3, 64, [4, 4], 2, 'SAME', scope='deconv2') deconvflow3 = slim.conv2d_transpose(predict3, 2, [4, 4], 2, 'SAME', scope='deconvflow3') concat2 = tf.concat((conv2, deconv2, deconvflow3), axis=3, name='concat2') predict2 = slim.conv2d(concat2, 2, [3, 3], 1, 'SAME', activation_fn=None, scope='predict2') # 192 * 256 flow deconv1 = slim.conv2d_transpose(concat2, 64, [4, 4], 2, 'SAME', scope='deconv1') deconvflow2 = slim.conv2d_transpose(predict2, 2, [4, 4], 2, 'SAME', scope='deconvflow2') concat1 = tf.concat((conv1, deconv1, deconvflow2), axis=3, name='concat1') predict1 = slim.conv2d(concat1, 2, [3, 3], 1, 'SAME', activation_fn=None, scope='predict1') return (predict1, predict3, predict2, predict4, predict5, predict6)
def net_structure(img1, img2, boundary1, boundary2): with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], # He (aka MSRA) weight initialization weights_initializer=slim.variance_scaling_initializer(), activation_fn=LeakyReLU, # We will do our own padding to match the original Caffe code padding='VALID'): weights_regularizer = slim.l2_regularizer(weight_decay) combination_a = tf.concat([img1, boundary1], axis=3) combination_b = tf.concat([img2, boundary2], axis=3) with slim.arg_scope([slim.conv2d], weights_regularizer=weights_regularizer): with slim.arg_scope([slim.conv2d], stride=2): conv_a_1 = slim.conv2d(pad(combination_a, 3), 64, 7, scope='conv1') conv_a_2 = slim.conv2d(pad(conv_a_1, 2), 128, 5, scope='conv2') conv_a_3 = slim.conv2d(pad(conv_a_2, 2), 256, 5, scope='conv3') conv_b_1 = slim.conv2d(pad(combination_b, 3), 64, 7, scope='conv1', reuse=True) conv_b_2 = slim.conv2d(pad(conv_b_1, 2), 128, 5, scope='conv2', reuse=True) conv_b_3 = slim.conv2d(pad(conv_b_2, 2), 256, 5, scope='conv3', reuse=True) # Compute cross correlation with leaky relu activation cc = correlation.correlation(conv_a_3, conv_b_3, 1, 20, 1, 2, 20) cc_relu = LeakyReLU(cc) # Combine cross correlation results with convolution of feature map A netA_conv = slim.conv2d(conv_a_3, 32, 1, scope='conv_redir') # Concatenate along the channels axis net = tf.concat([netA_conv, cc_relu], axis=3) conv3_1 = slim.conv2d(pad(net), 256, 3, scope='conv3_1') with slim.arg_scope([slim.conv2d], num_outputs=512, kernel_size=3): conv4 = slim.conv2d(pad(conv3_1), stride=2, scope='conv4') conv4_1 = slim.conv2d(pad(conv4), scope='conv4_1') conv5 = slim.conv2d(pad(conv4_1), stride=2, scope='conv5') conv5_1 = slim.conv2d(pad(conv5), scope='conv5_1') conv6 = slim.conv2d(pad(conv5_1), 1024, 3, stride=2, scope='conv6') conv6_1 = slim.conv2d(pad(conv6), 1024, 3, scope='conv6_1') """ START: Refinement Network """ with slim.arg_scope([slim.conv2d_transpose], biases_initializer=None): predict_flow6 = slim.conv2d(pad(conv6_1), 2, 3, scope='predict_flow6', activation_fn=None) deconv5 = antipad(slim.conv2d_transpose(conv6_1, 512, 4, stride=2, scope='deconv5')) upsample_flow6to5 = antipad(slim.conv2d_transpose(predict_flow6, 2, 4, stride=2, scope='upsample_flow6to5', activation_fn=None)) concat5 = tf.concat([conv5_1, deconv5, upsample_flow6to5], axis=3) predict_flow5 = slim.conv2d(pad(concat5), 2, 3, scope='predict_flow5', activation_fn=None) deconv4 = antipad(slim.conv2d_transpose(concat5, 256, 4, stride=2, scope='deconv4')) upsample_flow5to4 = antipad(slim.conv2d_transpose(predict_flow5, 2, 4, stride=2, scope='upsample_flow5to4', activation_fn=None)) concat4 = tf.concat([conv4_1, deconv4, upsample_flow5to4], axis=3) predict_flow4 = slim.conv2d(pad(concat4), 2, 3, scope='predict_flow4', activation_fn=None) deconv3 = antipad(slim.conv2d_transpose(concat4, 128, 4, stride=2, scope='deconv3')) upsample_flow4to3 = antipad(slim.conv2d_transpose(predict_flow4, 2, 4, stride=2, scope='upsample_flow4to3', activation_fn=None)) concat3 = tf.concat([conv3_1, deconv3, upsample_flow4to3], axis=3) predict_flow3 = slim.conv2d(pad(concat3), 2, 3, scope='predict_flow3', activation_fn=None) deconv2 = antipad(slim.conv2d_transpose(concat3, 64, 4, stride=2, scope='deconv2')) upsample_flow3to2 = antipad(slim.conv2d_transpose(predict_flow3, 2, 4, stride=2, scope='upsample_flow3to2', activation_fn=None)) concat2 = tf.concat([conv_a_2, deconv2, upsample_flow3to2], axis=3) predict_flow2 = slim.conv2d(pad(concat2), 2, 3, scope='predict_flow2', activation_fn=None) """ END: Refinement Network """ '''new loss''' # target_height, target_width = int(predict_flow2.shape[1].value), int(predict_flow2.shape[2].value) # predict_flow6 = tf.image.resize_bilinear(predict_flow6, # tf.stack([target_height, target_width]), # align_corners=True) # predict_flow5 = tf.image.resize_bilinear(predict_flow5, # tf.stack([target_height, target_width]), # align_corners=True) # predict_flow4 = tf.image.resize_bilinear(predict_flow4, # tf.stack([target_height, target_width]), # align_corners=True) # predict_flow3 = tf.image.resize_bilinear(predict_flow3, # tf.stack([target_height, target_width]), # align_corners=True) # predict = tf.concat([predict_flow5, predict_flow4, predict_flow3, predict_flow2], axis=3) # # flow = predict * 20.0 # flow_temp0 = slim.conv2d(pad(predict), num_outputs=2, kernel_size=2, stride=1, scope='flow_temp0') # flow_temp = tf.image.resize_bilinear(flow_temp0, # tf.stack([img_height, img_width]), # align_corners=True) # flow = flow_temp * 20.0 # origin loss compute flow = predict_flow2 * 20.0 # TODO: Look at Accum (train) or Resample (deploy) to see if we need to do something different flow = tf.image.resize_bilinear(flow, tf.stack([img_height, img_width]), align_corners=True) return { 'predict_flow6': predict_flow6, 'predict_flow5': predict_flow5, 'predict_flow4': predict_flow4, 'predict_flow3': predict_flow3, 'predict_flow2': predict_flow2, 'flow': flow, }
def create_generator(self, z, y, scope_name, is_training=True, reuse=False): with tf.variable_scope(scope_name) as scope: if reuse: scope.reuse_variables() batch_norm_params = { # 'decay': 0.999, 'decay': 0.9, # also known as momentum, they are the same 'updates_collections': None, # 'epsilon': 0.001, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training, 'scope': 'batch_norm', } # first argument is where to apply these with arg_scope([ layers.conv2d, layers.conv2d_transpose, layers.fully_connected ], normalizer_fn=layers.batch_norm, normalizer_params=batch_norm_params, weights_initializer=layers.xavier_initializer( uniform=False), biases_initializer=tf.constant_initializer(0.0)): # taken from https://github.com/carpedm20/DCGAN-tensorflow/blob/master/model.py s_h, s_w = self.image_size[1], self.image_size[0] s_h2, s_h4 = int(s_h / 2), int(s_h / 4) s_w2, s_w4 = int(s_w / 2), int(s_w / 4) yb = tf.reshape(y, [self.batch_size, 1, 1, self.y_dim]) z = tf.concat([z, y], 1) h0 = slim.fully_connected( z, num_outputs=self.gfc_dim, scope='g_h0_lin', activation_fn=slim.nn.relu, ) h0 = tf.concat([h0, y], 1) h1 = slim.fully_connected( h0, num_outputs=self.gf_dim * 2 * s_h4 * s_w4, scope='g_h1_lin', activation_fn=slim.nn.relu, ) h1 = tf.reshape(h1, [self.batch_size, s_h4, s_w4, self.gf_dim * 2]) h1 = conv_cond_concat(h1, yb) h2 = slim.conv2d_transpose( h1, num_outputs=self.gf_dim * 2, scope='g_h2', kernel_size=[5, 5], stride=2, activation_fn=slim.nn.relu, ) h2 = conv_cond_concat(h2, yb) h3 = slim.conv2d_transpose(h2, num_outputs=self.c_dim, scope='g_h3', kernel_size=[5, 5], stride=2, normalizer_fn=None, activation_fn=slim.nn.sigmoid) return h3
def generator(self, inputs, content_extractor_layers, reuse=False): # inputs: (batch, 1, 1, 128) with tf.variable_scope('generator', reuse=reuse): with slim.arg_scope([slim.conv2d_transpose], padding='SAME', activation_fn=None, stride=2, weights_initializer=tf.contrib.layers. xavier_initializer()): with slim.arg_scope([slim.batch_norm], decay=0.95, center=True, scale=True, activation_fn=tf.nn.relu, is_training=(self.mode == 'train')): with slim.arg_scope([slim.conv2d], padding='SAME', activation_fn=None, stride=1, weights_initializer=tf.contrib.layers. xavier_initializer()): net = slim.conv2d_transpose( inputs, 512, [4, 4], padding='VALID', scope='conv_transpose1_1' ) # (batch_size, 4, 4, 512) net = slim.batch_norm(net, scope='bn1_1') net = slim.conv2d(net, 512, [3, 3], scope='conv_transpose1_2' ) # (batch_size, 4, 4, 512) net = slim.batch_norm(net, scope='bn1_2') concat = tf.concat( 3, (net, content_extractor_layers['conv4_1'])) net = slim.conv2d_transpose( concat, 256, [3, 3], scope='conv_transpose2_1' ) # (batch_size, 8, 8, 256) net = slim.batch_norm(net, scope='bn2') net = slim.conv2d(net, 256, [3, 3], scope='conv_transpose2_2' ) # (batch_size, 8, 8, 256) net = slim.batch_norm(net, scope='bn2_2') concat = tf.concat( 3, (net, content_extractor_layers['conv3_1'])) net = slim.conv2d_transpose( concat, 128, [3, 3], scope='conv_transpose3_1' ) # (batch_size, 16, 16, 128) net = slim.batch_norm(net, scope='bn3') net = slim.conv2d(net, 128, [3, 3], scope='conv_transpose3_2' ) # (batch_size, 16, 16, 128) net = slim.batch_norm(net, scope='bn3_2') concat = tf.concat( 3, (net, content_extractor_layers['conv2_1'])) net = slim.conv2d_transpose( concat, 3, [3, 3], activation_fn=tf.nn.tanh, scope='conv_transpose4') # (batch_size, 32, 32, 3) return net
def deconv(x, *args, pad=1, **kwargs): with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='VALID'): x = padding(x, pad) return slim.conv2d_transpose(x, *args, **kwargs)
def VGG16(photo_source, photo_target, geo_source, geo_target, loss_weight): # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss inputs_norm = tf.nn.local_response_normalization(geo_source, depth_radius=4, beta=0.7) outputs_norm = tf.nn.local_response_normalization(geo_target, depth_radius=4, beta=0.7) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], weights_initializer=initializers.xavier_initializer(), weights_regularizer=None, biases_initializer=init_ops.zeros_initializer, biases_regularizer=None, activation_fn=tf.nn.elu ): # original use leaky ReLU, now we use elu conv1_1 = slim.conv2d(tf.concat(3, [photo_source, photo_target]), 64, [3, 3], scope='conv1_1') conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2') pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1') conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1') conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2') pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2') conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1') conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2') conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3') pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3') conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1') conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2') conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3') pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4') conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1') conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2') conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3') pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5') # Hyper-params for computing unsupervised loss epsilon = 0.0001 alpha_c = 0.25 alpha_s = 0.37 lambda_smooth = 1.0 scale = 2 # for deconvolution deltaWeights = {} # Calculating flow derivatives flow_width = tf.constant([[0, 0, 0], [0, 1, -1], [0, 0, 0]], tf.float32) flow_width_filter = tf.reshape(flow_width, [3, 3, 1, 1]) flow_width_filter = tf.tile(flow_width_filter, [1, 1, 2, 1]) flow_height = tf.constant([[0, 0, 0], [0, 1, 0], [0, -1, 0]], tf.float32) flow_height_filter = tf.reshape(flow_height, [3, 3, 1, 1]) flow_height_filter = tf.tile(flow_height_filter, [1, 1, 2, 1]) deltaWeights["flow_width_filter"] = flow_width_filter deltaWeights["flow_height_filter"] = flow_height_filter needImageGradients = False deltaWeights["needImageGradients"] = needImageGradients if needImageGradients: # Calculating image derivatives sobel_x = tf.constant([[-1, 0, 1], [-2, 0, 2], [-1, 0, 1]], tf.float32) sobel_x_filter = tf.reshape(sobel_x, [3, 3, 1, 1]) sobel_y_filter = tf.transpose(sobel_x_filter, [1, 0, 2, 3]) deltaWeights["sobel_x_filter"] = sobel_x_filter deltaWeights["sobel_y_filter"] = sobel_y_filter # Expanding part pr5 = slim.conv2d(pool5, 2, [3, 3], activation_fn=None, scope='pr5') h5 = pr5.get_shape()[1].value w5 = pr5.get_shape()[2].value pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5]) pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5]) flow_scale_5 = 0.625 # (*20/32) loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, deltaWeights) upconv4 = slim.conv2d_transpose(pool5, 256, [2 * scale, 2 * scale], stride=scale, scope='upconv4') pr5to4 = slim.conv2d_transpose(pr5, 2, [2 * scale, 2 * scale], stride=scale, activation_fn=None, scope='up_pr5to4') concat4 = tf.concat(3, [pool4, upconv4, pr5to4]) pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4') h4 = pr4.get_shape()[1].value w4 = pr4.get_shape()[2].value pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4]) pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4]) flow_scale_4 = 1.25 # (*20/16) loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, deltaWeights) upconv3 = slim.conv2d_transpose(concat4, 128, [2 * scale, 2 * scale], stride=scale, scope='upconv3') pr4to3 = slim.conv2d_transpose(pr4, 2, [2 * scale, 2 * scale], stride=scale, activation_fn=None, scope='up_pr4to3') concat3 = tf.concat(3, [pool3, upconv3, pr4to3]) pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3') h3 = pr3.get_shape()[1].value w3 = pr3.get_shape()[2].value pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3]) pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3]) flow_scale_3 = 2.5 # (*20/8) loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, deltaWeights) upconv2 = slim.conv2d_transpose(concat3, 64, [2 * scale, 2 * scale], stride=scale, scope='upconv2') pr3to2 = slim.conv2d_transpose(pr3, 2, [2 * scale, 2 * scale], stride=scale, activation_fn=None, scope='up_pr3to2') concat2 = tf.concat(3, [pool2, upconv2, pr3to2]) pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2') h2 = pr2.get_shape()[1].value w2 = pr2.get_shape()[2].value pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2]) pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2]) flow_scale_2 = 5.0 # (*20/4) loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, deltaWeights) upconv1 = slim.conv2d_transpose(concat2, 32, [2 * scale, 2 * scale], stride=scale, scope='upconv1') pr2to1 = slim.conv2d_transpose(pr2, 2, [2 * scale, 2 * scale], stride=scale, activation_fn=None, scope='up_pr2to1') concat1 = tf.concat(3, [pool1, upconv1, pr2to1]) pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1') h1 = pr1.get_shape()[1].value w1 = pr1.get_shape()[2].value pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1]) pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1]) flow_scale_1 = 10.0 # (*20/2) loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, deltaWeights) # Adding intermediate losses all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \ loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] slim.losses.add_loss(all_loss) losses = [loss1, loss2, loss3, loss4, loss5] flows_all = [ pr1 * flow_scale_1, pr2 * flow_scale_2, pr3 * flow_scale_3, pr4 * flow_scale_4, pr5 * flow_scale_5 ] return losses, flows_all, prev1
def dcgan_generator(z, flags, scope=None, reuse=None): """DCGAN-style generator network.""" nonlinearity = nonlinearity_fn(flags.nonlinearity_g, False) ds_fs = flags.downsample_conv_filt_size x_fs = flags.extra_conv_filt_size if not flags.norm_g: normalizer = None else: normalizer = contrib_slim.batch_norm with tf.variable_scope(scope, reuse=reuse): out = contrib_slim.fully_connected(z, 4 * 4 * (4 * flags.dim_g), scope='fc', normalizer_fn=normalizer, activation_fn=nonlinearity) out = tf.reshape(out, [-1, 4, 4, 4 * flags.dim_g]) if flags.extra_top_conv: out = contrib_slim.conv2d(out, 4 * flags.dim_d, x_fs, scope='extratopconv', activation_fn=nonlinearity, normalizer_fn=normalizer) out = contrib_slim.conv2d_transpose(out, 2 * flags.dim_g, ds_fs, scope='conv1', stride=2, normalizer_fn=normalizer, activation_fn=nonlinearity) for i in range(flags.extra_depth_g): out = contrib_slim.conv2d(out, 2 * flags.dim_g, x_fs, scope='extraconv1.{}'.format(i), normalizer_fn=normalizer, activation_fn=nonlinearity) out = contrib_slim.conv2d_transpose(out, flags.dim_g, ds_fs, scope='conv2', stride=2, normalizer_fn=normalizer, activation_fn=nonlinearity) for i in range(flags.extra_depth_g): out = contrib_slim.conv2d(out, flags.dim_g, x_fs, scope='extraconv2.{}'.format(i), normalizer_fn=normalizer, activation_fn=nonlinearity) out = contrib_slim.conv2d_transpose(out, 3, ds_fs, scope='conv3', stride=2, activation_fn=tf.tanh) return out
def branch_1(self): ################################################################################################################ #### Branch_1_0: Input: RawImage Output: fc1: bottle layer output just before deconv #### #### helper1: concat of feature map with size H*W for final deconv #### ################################################################################################################ with tf.variable_scope('branch_1_0', reuse=self.reuse): pack_img = model_tools.pack_raw(self.input_img) conv_1_0_low = slim.conv2d(inputs=pack_img, num_outputs=128, kernel_size=self.kernel_size, scope='conv_1_0_low', reuse=self.reuse, activation_fn=model_tools.lrelu) dense_1_0, next_in_0 = model_tools.block(conv_1_0_low, self.growth_rate, self.layers_per_block, self.kernel_size_dense, self.reuse, 'dense_1_0') pool_in_0 = slim.conv2d(inputs=next_in_0, num_outputs=128, kernel_size=self.kernel_size, stride=2, scope='pool_in_0', reuse=self.reuse, activation_fn=model_tools.lrelu) dense_1_1, next_in_1 = model_tools.block(pool_in_0, self.growth_rate, self.layers_per_block, self.kernel_size_dense, self.reuse, 'dense_1_1') dense_1_2, next_in_2 = model_tools.block(next_in_1, self.growth_rate, self.layers_per_block, self.kernel_size_dense, self.reuse, 'dense_1_2') bottle_1_0 = slim.conv2d(inputs=tf.concat( [dense_1_2, dense_1_1, pool_in_0], axis=3), num_outputs=128, kernel_size=1, scope='bottle_1_0', reuse=self.reuse, activation_fn=model_tools.lrelu) fc11 = bottle_1_0 helper1 = tf.concat([conv_1_0_low, dense_1_0], axis=3) ################################################################################################################ #### Branch_1_0: Input: RawImage Output: fc1: bottle layer output just before deconv #### #### helper1: concat of feature map with size H*W for final deconv #### ################################################################################################################ # with tf.variable_scope('branch_1_1', reuse=self.reuse): deconv_1_1_0 = slim.conv2d_transpose( inputs=fc11, num_outputs=128, kernel_size=[4, 4], stride=2, reuse=self.reuse, scope='deconv_1_1_0', activation_fn=model_tools.lrelu) conv_1_1_0 = slim.conv2d(inputs=deconv_1_1_0, num_outputs=128, kernel_size=self.kernel_size, scope='conv_1_1_0', reuse=self.reuse, activation_fn=model_tools.lrelu) dense_1_3, next_in_0 = model_tools.block(conv_1_1_0, self.growth_rate, self.layers_per_block, self.kernel_size_dense, self.reuse, 'dense_1_3') bottle_1_1 = slim.conv2d(inputs=tf.concat( [helper1, conv_1_1_0, dense_1_3], axis=3), num_outputs=self.bottle_output, kernel_size=1, scope='bottle_1_1', reuse=self.reuse, activation_fn=model_tools.lrelu) fc12 = bottle_1_1 #conv_1_1_1 = bottle_1_1 conv_1_1_1 = slim.conv2d(inputs=bottle_1_1, num_outputs=12 * Scale**2, kernel_size=self.kernel_size, scope='conv_1_1_1', reuse=self.reuse, activation_fn=model_tools.lrelu) if Scale == 4: conv_r = tf.depth_to_space(conv_1_1_1[..., :4 * Scale**2], Scale * 2) conv_g = tf.depth_to_space( conv_1_1_1[..., 4 * Scale**2:8 * Scale**2], Scale * 2) conv_b = tf.depth_to_space(conv_1_1_1[..., 8 * Scale**2:], Scale * 2) else: conv_r = tf.depth_to_space( tf.depth_to_space(conv_1_1_1[..., :4 * Scale**2], 2), Scale) conv_g = tf.depth_to_space( tf.depth_to_space( conv_1_1_1[..., 4 * Scale**2:8 * Scale**2], Scale), Scale) conv_b = tf.depth_to_space( tf.depth_to_space(conv_1_1_1[..., 8 * Scale**2:], Scale), Scale) rgb = tf.concat([conv_r, conv_g, conv_b], axis=3) return rgb, fc11, fc12
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None): """Build the 3-way outputs, i.e., class, box and mask in the pyramid Algo ---- For each layer: 1. Build anchor layer 2. Process the results of anchor layer, decode the output into rois 3. Sample rois 4. Build roi layer 5. Process the results of roi layer, decode the output into boxes 6. Build the mask layer 7. Build losses """ outputs = {} arg_scope = _extra_conv_arg_scope(activation_fn=None) my_sigmoid = None with slim.arg_scope(arg_scope): with tf.variable_scope('pyramid'): # for p in pyramid: outputs['rpn'] = {} for i in range(5, 1, -1): p = 'P%d'%i stride = 2 ** i ## rpn head shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p) box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \ weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid) cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \ weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)] all_anchors = gen_all_anchors(height, width, stride, anchor_scales) outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors} ## gather all rois # print (outputs['rpn']) rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)] rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)] rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)] rpn_boxes = tf.concat(values=rpn_boxes, axis=0) rpn_clses = tf.concat(values=rpn_clses, axis=0) rpn_anchors = tf.concat(values=rpn_anchors, axis=0) outputs['rpn']['box'] = rpn_boxes outputs['rpn']['cls'] = rpn_clses outputs['rpn']['anchor'] = rpn_anchors # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors} rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2])) rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw) # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1]) rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \ sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training) # if is_training: # # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes) # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2) outputs['roi'] = {'box': rois, 'score': scores} ## cropping regions [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \ assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5]) cropped_rois = [] for i in range(5, 1, -1): p = 'P%d'%i splitted_rois = assigned_rois[i-2] batch_inds = assigned_batch_inds[i-2] cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i, pooled_height=14, pooled_width=14) cropped_rois.append(cropped) cropped_rois = tf.concat(values=cropped_rois, axis=0) outputs['roi']['cropped_rois'] = cropped_rois tf.add_to_collection('__CROPPED__', cropped_rois) ## refine head # to 7 x 7 cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME') refine = slim.flatten(cropped_regions) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) cls2 = slim.fully_connected(refine, num_classes, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) box = slim.fully_connected(refine, num_classes*4, activation_fn=my_sigmoid, weights_initializer=tf.truncated_normal_initializer(stddev=0.001)) outputs['refined'] = {'box': box, 'cls': cls2} ## decode refine net outputs cls2_prob = tf.nn.softmax(cls2) final_boxes, classes, scores = \ roi_decoder(box, cls2_prob, rois, ih, iw) ## for testing, maskrcnn takes refined boxes as inputs if not is_training: rois = final_boxes # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \ # assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5]) for i in range(5, 1, -1): splitted_rois = assigned_rois[i-2] batch_inds = assigned_batch_inds[i-2] p = 'P%d'%i cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i, pooled_height=14, pooled_width=14) cropped_rois.append(cropped) cropped_rois = tf.concat(values=cropped_rois, axis=0) ## mask head m = cropped_rois for _ in range(4): m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu) # to 28 x 28 m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu) tf.add_to_collection('__TRANSPOSED__', m) m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None) # add a mask, given the predicted boxes and classes outputs['mask'] = {'mask':m, 'cls': classes, 'score': scores} return outputs
def generator(self, inputs, reuse=False, scope='g_net'): n, h, w, c = inputs.get_shape().as_list() if self.args.model == 'lstm': with tf.variable_scope('LSTM'): cell = BasicConvLSTMCell([h / 4, w / 4], [3, 3], 128) rnn_state = cell.zero_state(batch_size=self.batch_size, dtype=tf.float32) x_unwrap = [] with tf.variable_scope(scope, reuse=reuse): with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.relu, padding='SAME', normalizer_fn=None, weights_initializer=tf.contrib.layers.xavier_initializer(uniform=True), biases_initializer=tf.constant_initializer(0.0)): inp_pred = inputs for i in xrange(self.n_levels): scale = self.scale ** (self.n_levels - i - 1) hi = int(round(h * scale)) wi = int(round(w * scale)) inp_blur = tf.image.resize_images(inputs, [hi, wi], method=0) inp_pred = tf.stop_gradient(tf.image.resize_images(inp_pred, [hi, wi], method=0)) inp_all = tf.concat([inp_blur, inp_pred], axis=3, name='inp') if self.args.model == 'lstm': rnn_state = tf.image.resize_images(rnn_state, [hi // 4, wi // 4], method=0) # encoder conv1_1 = slim.conv2d(inp_all, 32, [5, 5], scope='enc1_1') conv1_2 = ResnetBlock(conv1_1, 32, 5, scope='enc1_2') conv1_3 = ResnetBlock(conv1_2, 32, 5, scope='enc1_3') conv1_4 = ResnetBlock(conv1_3, 32, 5, scope='enc1_4') conv2_1 = slim.conv2d(conv1_4, 64, [5, 5], stride=2, scope='enc2_1') conv2_2 = ResnetBlock(conv2_1, 64, 5, scope='enc2_2') conv2_3 = ResnetBlock(conv2_2, 64, 5, scope='enc2_3') conv2_4 = ResnetBlock(conv2_3, 64, 5, scope='enc2_4') conv3_1 = slim.conv2d(conv2_4, 128, [5, 5], stride=2, scope='enc3_1') conv3_2 = ResnetBlock(conv3_1, 128, 5, scope='enc3_2') conv3_3 = ResnetBlock(conv3_2, 128, 5, scope='enc3_3') conv3_4 = ResnetBlock(conv3_3, 128, 5, scope='enc3_4') if self.args.model == 'lstm': deconv3_4, rnn_state = cell(conv3_4, rnn_state) else: deconv3_4 = conv3_4 # decoder deconv3_3 = ResnetBlock(deconv3_4, 128, 5, scope='dec3_3') deconv3_2 = ResnetBlock(deconv3_3, 128, 5, scope='dec3_2') deconv3_1 = ResnetBlock(deconv3_2, 128, 5, scope='dec3_1') deconv2_4 = slim.conv2d_transpose(deconv3_1, 64, [4, 4], stride=2, scope='dec2_4') cat2 = deconv2_4 + conv2_4 deconv2_3 = ResnetBlock(cat2, 64, 5, scope='dec2_3') deconv2_2 = ResnetBlock(deconv2_3, 64, 5, scope='dec2_2') deconv2_1 = ResnetBlock(deconv2_2, 64, 5, scope='dec2_1') deconv1_4 = slim.conv2d_transpose(deconv2_1, 32, [4, 4], stride=2, scope='dec1_4') cat1 = deconv1_4 + conv1_4 deconv1_3 = ResnetBlock(cat1, 32, 5, scope='dec1_3') deconv1_2 = ResnetBlock(deconv1_3, 32, 5, scope='dec1_2') deconv1_1 = ResnetBlock(deconv1_2, 32, 5, scope='dec1_1') inp_pred = slim.conv2d(deconv1_1, self.chns, [5, 5], activation_fn=None, scope='dec1_0') if i >= 0: x_unwrap.append(inp_pred) if i == 0: tf.get_variable_scope().reuse_variables() return x_unwrap
def build_model(self, inputs, keep_prob): ''' 复现unet模型 :param inputs:[None,height,width,channel] :return:[None,height,width,classes] ''' with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], padding="SAME", kernel_size=[3, 3], stride=1, weights_initializer=tf.truncated_normal_initializer( stddev=0.01), weights_regularizer=slim.l2_regularizer(0.005)): with slim.arg_scope([slim.conv2d_transpose], stride=2): with slim.arg_scope([slim.dropout], keep_prob=keep_prob): with tf.name_scope("Unet"): with tf.variable_scope("downsampling"): # downsampling x = slim.conv2d(inputs, 16, scope='conv1') # 1024 x = slim.conv2d(x, 32, scope='conv2') x = slim.conv2d(x, 64, scope='conv3') crop_1 = tf.identity(x, name="crop1") x = slim.dropout(x) x = slim.max_pool2d(x, [2, 2], 2, padding="VALID", scope="max_pool1") # 512 x = slim.conv2d(x, 128, scope='conv4') x = slim.conv2d(x, 128, scope='conv5') crop_2 = tf.identity(x, name="crop2") x = slim.dropout(x) x = slim.max_pool2d(x, [2, 2], 2, padding="VALID", scope="max_pool2") # 256 x = slim.conv2d(x, 256, scope='conv6') x = slim.conv2d(x, 256, scope='conv7') crop_3 = tf.identity(x, name="crop3") x = slim.dropout(x) x = slim.max_pool2d(x, [2, 2], 2, padding="VALID", scope="max_pool3") # 128 x = slim.conv2d(x, 512, scope='conv8') x = slim.conv2d(x, 512, scope='conv9') crop_4 = tf.identity(x, name="crop4") x = slim.dropout(x) x = slim.max_pool2d(x, [2, 2], 2, padding="VALID", scope="max_pool3") # 64 x = slim.conv2d(x, 1024, scope='conv10') x = slim.conv2d(x, 1024, scope='conv11') crop_5 = tf.identity(x, name="crop5") x = slim.dropout(x) x = slim.max_pool2d(x, [2, 2], 2, padding="VALID", scope="max_pool3") # 32 x = slim.conv2d(x, 2048, scope='conv12') x = slim.conv2d(x, 2048, scope='conv13') x = slim.dropout(x) with tf.variable_scope("upsampling"): # upsampling x = slim.conv2d_transpose(x, 1024, scope="deconv1") # 64 x = tf.concat((x, crop_5), axis=3) x = slim.conv2d(x, 1024, scope='upconv1') x = slim.conv2d(x, 1024, scope='upconv2') x = slim.dropout(x) x = slim.conv2d_transpose(x, 512, scope="deconv2") # 128 x = tf.concat((x, crop_4), axis=3) x = slim.conv2d(x, 512, scope='upconv3') x = slim.conv2d(x, 512, scope='upconv4') x = slim.dropout(x) x = slim.conv2d_transpose(x, 256, scope="deconv3") # 256 x = tf.concat((x, crop_3), axis=3) x = slim.conv2d(x, 256, scope='upconv5') x = slim.conv2d(x, 256, scope='upconv6') x = slim.dropout(x) x = slim.conv2d_transpose(x, 128, scope="deconv4") # 512 x = tf.concat((x, crop_2), axis=3) x = slim.conv2d(x, 128, scope='upconv7') x = slim.conv2d(x, 128, scope='upconv8') x = slim.dropout(x) x = slim.conv2d_transpose(x, 64, scope="deconv5") # 1024 x = tf.concat((x, crop_1), axis=3) x = slim.conv2d(x, 64, scope='upconv9') x = slim.conv2d(x, 2, scope='upconv10') return x
def fcn_model(inputs, num_classes=21, is_training=True, dropout_keep_prob=0.8, reuse=None): if not is_training: dropout_keep_prob = 1.0 with tf.variable_scope('vgg_16', reuse=reuse): with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], padding='SAME', activation_fn=tf.nn.selu, weights_initializer=tf.glorot_normal_initializer()): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = tf.contrib.nn.alpha_dropout(net, dropout_keep_prob) net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') net = slim.conv2d_transpose(net, 256, kernel_size=(3, 3), stride=(2, 2), scope="deconv1") net = tf.contrib.nn.alpha_dropout(net, dropout_keep_prob) # net = slim.batch_norm(net, 8, is_training=is_training) # net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout') net = slim.conv2d_transpose(net, 128, kernel_size=(3, 3), stride=(2, 2), scope="deconv2") # net = slim.batch_norm(net, 8, is_training=is_training) net = slim.conv2d_transpose(net, 64, kernel_size=(3, 3), stride=(4, 4), scope="deconv3") net = tf.contrib.nn.alpha_dropout(net, dropout_keep_prob) # net = slim.batch_norm(net, 8, is_training=is_training) net = slim.conv2d_transpose(net, 32, kernel_size=(3, 3), stride=(2, 2), scope="deconv4") # preds = slim.batch_norm(net, 8, is_training=is_training) preds = slim.conv2d(net, num_classes, [2, 2], scope="conv6") return preds
def deconv2d(x, o_dim, data_format='NHWC', name=None, k=4, s=2, act=None): return slim.conv2d_transpose(x, o_dim, k, stride=s, activation_fn=act, scope=name, data_format=data_format)
def add_conv_transpose_layer(*args, **kwargs): net = slim.conv2d_transpose(*args, **kwargs) tf.add_to_collection(tf.GraphKeys.ACTIVATIONS, net) if 'scope' in kwargs: print('\t\t{scope}'.format(scope=kwargs['scope']), net.get_shape()) return net
def disp_net(tgt_image, is_training=True, do_edge=False): batch_norm_params = {'is_training': is_training, 'decay': 0.999} H = tgt_image.get_shape()[1].value W = tgt_image.get_shape()[2].value tgt_image = tf.image.resize_bilinear(tgt_image, [127, 415]) with tf.variable_scope('depth_net') as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], # normalizer_fn = None, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.05), activation_fn=tf.nn.relu, outputs_collections=end_points_collection): cnv1 = slim.conv2d(tgt_image, 32, [7, 7], stride=1, scope='cnv1') cnv1b = slim.conv2d(cnv1, 32, [7, 7], stride=1, scope='cnv1b') cnv2 = slim.conv2d(cnv1b, 64, [5, 5], stride=2, scope='cnv2') cnv2b = slim.conv2d(cnv2, 64, [5, 5], stride=1, scope='cnv2b') cnv3 = slim.conv2d(cnv2b, 128, [3, 3], stride=2, scope='cnv3') cnv3b = slim.conv2d(cnv3, 128, [3, 3], stride=1, scope='cnv3b') cnv4 = slim.conv2d(cnv3b, 256, [3, 3], stride=2, scope='cnv4') cnv4b = slim.conv2d(cnv4, 256, [3, 3], stride=1, scope='cnv4b') cnv5 = slim.conv2d(cnv4b, 512, [3, 3], stride=2, scope='cnv5') cnv5b = slim.conv2d(cnv5, 512, [3, 3], stride=1, scope='cnv5b') cnv6 = slim.conv2d(cnv5b, 512, [3, 3], stride=2, scope='cnv6') cnv6b = slim.conv2d(cnv6, 512, [3, 3], stride=1, scope='cnv6b') cnv7 = slim.conv2d(cnv6b, 512, [3, 3], stride=2, scope='cnv7') cnv7b = slim.conv2d(cnv7, 512, [3, 3], stride=1, scope='cnv7b') upcnv7 = slim.conv2d_transpose(cnv7b, 512, [3, 3], stride=2, scope='upcnv7') # There might be dimension mismatch due to uneven down/up-sampling upcnv7 = resize_like(upcnv7, cnv6b) i7_in = tf.concat([upcnv7, cnv6b], axis=3) icnv7 = slim.conv2d(i7_in, 512, [3, 3], stride=1, scope='icnv7') upcnv6 = slim.conv2d_transpose(icnv7, 512, [3, 3], stride=2, scope='upcnv6') upcnv6 = resize_like(upcnv6, cnv5b) i6_in = tf.concat([upcnv6, cnv5b], axis=3) icnv6 = slim.conv2d(i6_in, 512, [3, 3], stride=1, scope='icnv6') upcnv5 = slim.conv2d_transpose(icnv6, 256, [3, 3], stride=2, scope='upcnv5') upcnv5 = resize_like(upcnv5, cnv4b) i5_in = tf.concat([upcnv5, cnv4b], axis=3) icnv5 = slim.conv2d(i5_in, 256, [3, 3], stride=1, scope='icnv5') upcnv4 = slim.conv2d_transpose(icnv5, 128, [3, 3], stride=2, scope='upcnv4') i4_in = tf.concat([upcnv4, cnv3b], axis=3) icnv4 = slim.conv2d(i4_in, 128, [3, 3], stride=1, scope='icnv4') disp4 = DISP_SCALING * slim.conv2d(icnv4, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp4') + MIN_DISP disp4 = tf.image.resize_bilinear(disp4, [H // 8, W // 8]) disp4_up = tf.image.resize_bilinear( disp4, [np.int(H / 4), np.int(W / 4)]) upcnv3 = slim.conv2d_transpose(icnv4, 64, [3, 3], stride=2, scope='upcnv3') i3_in = tf.concat([upcnv3, cnv2b, disp4_up], axis=3) icnv3 = slim.conv2d(i3_in, 64, [3, 3], stride=1, scope='icnv3') disp3 = DISP_SCALING * slim.conv2d(icnv3, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp3') + MIN_DISP disp3 = tf.image.resize_bilinear(disp3, [H // 4, W // 4]) cnv1b_shape = cnv1b.get_shape().as_list() disp3_up = tf.image.resize_bilinear( disp3, [cnv1b_shape[1], cnv1b_shape[2]]) upcnv2 = slim.conv2d_transpose(icnv3, 32, [3, 3], stride=2, scope='upcnv2') upcnv2 = tf.image.resize_bilinear(upcnv2, [cnv1b_shape[1], cnv1b_shape[2]]) i2_in = tf.concat([upcnv2, cnv1b, disp3_up], axis=3) icnv2 = slim.conv2d(i2_in, 32, [3, 3], stride=1, scope='icnv2') disp2 = DISP_SCALING * slim.conv2d(icnv2, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp2') + MIN_DISP disp2 = tf.image.resize_bilinear(disp2, [H // 2, W // 2]) disp2_up = tf.image.resize_bilinear(disp2, [H, W]) upcnv1 = slim.conv2d_transpose(icnv2, 16, [3, 3], stride=2, scope='upcnv1') disp2_up = tf.image.resize_bilinear(disp2_up, [ upcnv1.get_shape().as_list()[1], upcnv1.get_shape().as_list()[2] ]) i1_in = tf.concat([upcnv1, disp2_up], axis=3) icnv1 = slim.conv2d(i1_in, 16, [3, 3], stride=1, scope='icnv1') disp1 = DISP_SCALING * slim.conv2d(icnv1, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, normalizer_fn=None, scope='disp1') + MIN_DISP disp1 = tf.image.resize_bilinear(disp1, [H, W]) # Edge mask layers if do_edge: with tf.variable_scope('edge'): upcnv7_e = slim.conv2d_transpose(cnv7b, 512, [4, 4], stride=2, scope='upcnv7') # There might be dimension mismatch due to uneven down/up-sampling upcnv7_e = resize_like(upcnv7_e, cnv6b) i7_in_e = tf.concat([upcnv7_e, cnv6b], axis=3) icnv7_e = slim.conv2d(i7_in_e, 512, [3, 3], stride=1, scope='icnv7') upcnv6_e = slim.conv2d_transpose(icnv7_e, 512, [4, 4], stride=2, scope='upcnv6') upcnv6_e = resize_like(upcnv6_e, cnv5b) i6_in_e = tf.concat([upcnv6_e, cnv5b], axis=3) icnv6_e = slim.conv2d(i6_in_e, 512, [3, 3], stride=1, scope='icnv6') upcnv5_e = slim.conv2d_transpose(icnv6_e, 256, [4, 4], stride=2, scope='upcnv5') upcnv5_e = resize_like(upcnv5_e, cnv4b) i5_in_e = tf.concat([upcnv5_e, cnv4b], axis=3) icnv5_e = slim.conv2d(i5_in_e, 256, [3, 3], stride=1, scope='icnv5') upcnv4_e = slim.conv2d_transpose(icnv5_e, 128, [4, 4], stride=2, scope='upcnv4') i4_in_e = tf.concat([upcnv4_e, cnv3b], axis=3) icnv4_e = slim.conv2d(i4_in_e, 128, [3, 3], stride=1, scope='icnv4') edge4 = slim.conv2d(icnv4_e, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, normalizer_fn=None, scope='edge4') + MIN_EDGE edge4 = tf.image.resize_nearest_neighbor( edge4, [H // 8, W // 8]) # edge4_up = tf.image.resize_bilinear(edge4, [np.int(H/4), np.int(W/4)]) edge4_up = tf.image.resize_nearest_neighbor( edge4, [np.int(H / 4), np.int(W / 4)]) upcnv3_e = slim.conv2d_transpose(icnv4_e, 64, [4, 4], stride=2, scope='upcnv3') i3_in_e = tf.concat([upcnv3_e, cnv2b, edge4_up], axis=3) # i3_in_e = tf.concat([upcnv3_e, cnv2b], axis=3) icnv3_e = slim.conv2d(i3_in_e, 64, [3, 3], stride=1, scope='icnv3') edge3 = slim.conv2d(icnv3_e, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, normalizer_fn=None, scope='edge3') + MIN_EDGE edge3 = tf.image.resize_nearest_neighbor( edge3, [H // 4, W // 4]) # edge3_up = tf.image.resize_bilinear(edge3, [np.int(H/2), np.int(W/2)]) edge3_up = tf.image.resize_nearest_neighbor( edge3, [np.int(H / 2), np.int(W / 2)]) edge3_up = tf.image.resize_nearest_neighbor( edge3_up, [cnv1b_shape[1], cnv1b_shape[2]]) upcnv2_e = slim.conv2d_transpose(icnv3_e, 32, [4, 4], stride=2, scope='upcnv2') upcnv2_e = tf.image.resize_nearest_neighbor( upcnv2_e, [cnv1b_shape[1], cnv1b_shape[2]]) i2_in_e = tf.concat([upcnv2_e, cnv1b, edge3_up], axis=3) # i2_in_e = tf.concat([upcnv2_e, cnv1b], axis=3) icnv2_e = slim.conv2d(i2_in_e, 32, [3, 3], stride=1, scope='icnv2') edge2 = slim.conv2d(icnv2_e, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, normalizer_fn=None, scope='edge2') + MIN_EDGE edge2 = tf.image.resize_nearest_neighbor( edge2, [H // 2, W // 2]) # edge2_up = tf.image.resize_bilinear(edge2, [H, W]) edge2_up = tf.image.resize_nearest_neighbor(edge2, [H, W]) upcnv1_e = slim.conv2d_transpose(icnv2_e, 16, [4, 4], stride=2, scope='upcnv1') edge2_up = tf.image.resize_nearest_neighbor( edge2, [ upcnv1_e.get_shape().as_list()[1], upcnv1_e.get_shape().as_list()[2] ]) i1_in_e = tf.concat([upcnv1_e, edge2_up], axis=3) # i1_in_e = tf.concat([upcnv1_e], axis=3) icnv1_e = slim.conv2d(i1_in_e, 16, [3, 3], stride=1, scope='icnv1') edge1 = slim.conv2d(icnv1_e, 1, [3, 3], stride=1, activation_fn=tf.sigmoid, normalizer_fn=None, scope='edge1') + MIN_EDGE edge1 = tf.image.resize_nearest_neighbor(edge1, [H, W]) # down-scale the edges at lower scale from highest resolution edge results # edge2 = slim.max_pool2d(edge1, 2) # edge3 = slim.max_pool2d(edge2, 2) # edge4 = slim.max_pool2d(edge3, 2) else: edge1 = None edge2 = None edge3 = None edge4 = None end_points = utils.convert_collection_to_dict( end_points_collection) return [disp1, disp2, disp3, disp4], [edge1, edge2, edge3, edge4], end_points
def buildNet(self, images, category_classes, attribute_classes, weight_decay=0.0005, is_training=False, dropout_keep_prob=0.5, stage='landmark'): # construct VGG base net net, end_points = self.vgg.buildNet( 'VGG_16', images, category_classes, is_training=is_training, weight_decay=weight_decay, dropout_keep_prob=dropout_keep_prob, final_endpoint='conv4') with tf.variable_scope('BCRNN'): with slim.arg_scope( [slim.conv2d], activation_fn=None, weights_regularizer=slim.l2_regularizer(weight_decay), padding='SAME'): # 8 landmarks and 1 background # heat_maps = slim.conv2d(net, 9, [1, 1], scope='ConstructHeatMaps') # Only provide 8 landmarks heat_maps = slim.conv2d(net, 8, [1, 1], scope='ConstructHeatMaps') heat_maps = tf.sigmoid(heat_maps, name='sigmoid') # if stage.lower() == 'landmark': # return heat_maps # heat-maps l-collar l-sleeve l-waistline l-hem r-... # Should heat_maps be transpose? # heat_maps = tf.transpose(heat_maps, (3, 0, 1, 2)) # grammar: # RK: # l.collar <-> l.waistline <-> l.hem; # l.collar <-> l.sleeve; # r.collar <-> r.waistline <-> r.hem; # r.collar <-> r.sleeve: # RS: # l.collar <-> r.collar; # l.sleeve <-> r.sleeve; # l.waistline <-> r.waistline; # l.hem <-> r.hem: RK1_refined_heatmaps = self.BCRNNBlock(heat_maps, 3, [0, 2, 3], 'RK_1') RK2_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [0, 1], 'RK_2') RK3_refined_heatmaps = self.BCRNNBlock(heat_maps, 3, [4, 6, 7], 'RK_3') RK4_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [4, 5], 'RK_4') RS1_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [0, 4], 'RS_1') RS2_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [1, 5], 'RS_2') RS3_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [2, 6], 'RS_3') RS4_refined_heatmaps = self.BCRNNBlock(heat_maps, 2, [3, 7], 'RS_4') # background = heat_maps[8] # max merge heatmaps l_collar = tf.reduce_max([ RK1_refined_heatmaps[0], RK2_refined_heatmaps[0], RS1_refined_heatmaps[0] ], axis=0) l_sleeve = tf.reduce_max( [RK2_refined_heatmaps[1], RS2_refined_heatmaps[0]], axis=0) l_waistline = tf.reduce_max( [RK1_refined_heatmaps[1], RS3_refined_heatmaps[0]], axis=0) l_hem = tf.reduce_max( [RK1_refined_heatmaps[2], RS4_refined_heatmaps[0]], axis=0) r_collar = tf.reduce_max([ RK3_refined_heatmaps[0], RK4_refined_heatmaps[0], RS1_refined_heatmaps[1] ], axis=0) r_sleeve = tf.reduce_max( [RK4_refined_heatmaps[1], RS2_refined_heatmaps[1]], axis=0) r_waistline = tf.reduce_max( [RK3_refined_heatmaps[1], RS3_refined_heatmaps[1]], axis=0) r_hem = tf.reduce_max( [RK3_refined_heatmaps[2], RS4_refined_heatmaps[1]], axis=0) refined_heatmaps = tf.stack([ l_collar, l_sleeve, l_waistline, l_hem, r_collar, r_sleeve, r_waistline, r_hem, ], axis=3) # landmarks predictions # output = tf.nn.softmax(refined_heatmaps, name='RefinedHeatMaps') # Not softmax! I think it should be sigmoid to provide the probability! # Each pixl should be a probability to express if it is keypoint! output = tf.sigmoid(refined_heatmaps, name='RefinedHeatMaps') if stage.lower() == 'landmark': return output, None with tf.variable_scope('LandmarkAttention'): output = output[:, :, :, :-1] # TODO: This is not be reduce_mean # Should be max pooling, get the maxium value from each chanel! AL = tf.reduce_mean(output, axis=-1, keep_dims=True) # tile_shape = tf.ones_like(output.shape) # tile_shape[-1] = output.shape[-1] AL = tf.tile(AL, [1, 1, 1, net.shape[-1]]) GL = tf.multiply(AL, net) with tf.variable_scope('ClothingAttention'): with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, weights_regularizer=slim.l2_regularizer(weight_decay), biases_initializer=tf.zeros_initializer(), scope='ClothingAttention'): AC = slim.max_pool2d(net, [2, 2], scope='AC_pool1') AC = slim.conv2d(AC, 512, [3, 3], scope='AC_conv1') AC = slim.max_pool2d(AC, [2, 2], scope='AC_pool2') AC = slim.conv2d(AC, 512, [3, 3], scope='AC_conv2') AC = slim.conv2d_transpose(AC, num_outputs=512, stride=4, kernel_size=[3, 3], padding='SAME', scope='AC_upsample') AC = tf.sigmoid(AC, 'sigmoid') GC = tf.multiply(AC, net) with tf.variable_scope('Classification'): with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, weights_regularizer=slim.l2_regularizer(weight_decay), biases_initializer=tf.zeros_initializer()): net = net + GL + GC net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') # Use conv2d instead of fully_connected layers. net = slim.conv2d(net, 4096, [7, 7], padding='VALID', scope='fc6') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout6') net = slim.conv2d(net, 4096, [1, 1], scope='fc7') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout7') # predict category net_category = slim.conv2d(net, category_classes, [1, 1], scope='fc8_category') net_category = tf.squeeze(net_category, [1, 2], name='fc8_category/squeezed') #net_category = tf.nn.softmax(net_category, name='Predictions_category') #net_category = tf.layers.dense(net_category, category_classes, name='Predictions_category') # predict attribute net_attribute = slim.conv2d(net, attribute_classes, [1, 1], activation_fn=tf.nn.sigmoid, scope='fc8_attribute') net_attribute = tf.squeeze(net_attribute, [1, 2], name='fc8_attribute/squeezed') #net_attribute = tf.layers.dense(net_attribute, attribute_classes, activation=None, name='Predictions_attribute') return net_category, net_attribute