def create_model(self, model_input, vocab_size, num_mixtures=None, l2_penalty=1e-8, **unused_params): """Creates a Mixture of (Logistic) Experts model. The model consists of a per-class softmax distribution over a configurable number of logistic classifiers. One of the classifiers in the mixture is not trained, and always predicts 0. Args: model_input: 'batch_size' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_mixtures: The number of mixtures (excluding a dummy 'expert' that always predicts the non-existence of an entity). l2_penalty: How much to penalize the squared magnitudes of parameter values. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes. """ num_mixtures = num_mixtures or FLAGS.moe_num_mixtures gate_activations = slim.fully_connected( model_input, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates") expert_activations = slim.fully_connected( model_input, vocab_size * num_mixtures, activation_fn=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts") gating_distribution = tf.nn.softmax(tf.reshape( gate_activations, [-1, num_mixtures + 1])) # (Batch * #Labels) x (num_mixtures + 1) expert_distribution = tf.nn.sigmoid(tf.reshape( expert_activations, [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures final_probabilities_by_class_and_batch = tf.reduce_sum( gating_distribution[:, :num_mixtures] * expert_distribution, 1) final_probabilities = tf.reshape(final_probabilities_by_class_and_batch, [-1, vocab_size]) return {"predictions": final_probabilities}
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1,4,4,256], name='Reshape') net = tf.image.resize_nearest_neighbor(net, size=(8,8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = tf.image.resize_nearest_neighbor(net, size=(16,16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = tf.image.resize_nearest_neighbor(net, size=(32,32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = tf.image.resize_nearest_neighbor(net, size=(64,64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c') return net
def __init__(self, net, labels_one_hot, model_params, method_params): """Stores argument in member variable for further use. Args: net: A tensor with shape [batch_size, num_features, feature_size] which contains some extracted image features. labels_one_hot: An optional (can be None) ground truth labels for the input features. Is a tensor with shape [batch_size, seq_length, num_char_classes] model_params: A namedtuple with model parameters (model.ModelParams). method_params: A SequenceLayerParams instance. """ self._params = model_params self._mparams = method_params self._net = net self._labels_one_hot = labels_one_hot self._batch_size = net.get_shape().dims[0].value # Initialize parameters for char logits which will be computed on the fly # inside an LSTM decoder. self._char_logits = {} regularizer = slim.l2_regularizer(self._mparams.weight_decay) self._softmax_w = slim.model_variable( 'softmax_w', [self._mparams.num_lstm_units, self._params.num_char_classes], initializer=orthogonal_initializer, regularizer=regularizer) self._softmax_b = slim.model_variable( 'softmax_b', [self._params.num_char_classes], initializer=tf.zeros_initializer(), regularizer=regularizer)
def _extra_conv_arg_scope(weight_decay=0.00001, activation_fn=None, normalizer_fn=None): with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose], padding='SAME', weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=activation_fn, normalizer_fn=normalizer_fn,) as arg_sc: with slim.arg_scope( [slim.fully_connected], weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=activation_fn, normalizer_fn=normalizer_fn) as arg_sc: return arg_sc
def build_feature_pyramid(self): ''' reference: https://github.com/CharlesShang/FastMaskRCNN build P2, P3, P4, P5, P6 :return: multi-scale feature map ''' feature_pyramid = {} with tf.variable_scope('feature_pyramid'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.rpn_weight_decay)): feature_pyramid['P5'] = slim.conv2d(self.feature_maps_dict['C5'], num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P5') feature_pyramid['P6'] = slim.max_pool2d(feature_pyramid['P5'], kernel_size=[2, 2], stride=2, scope='build_P6') # P6 is down sample of P5 for layer in range(4, 1, -1): p, c = feature_pyramid['P' + str(layer + 1)], self.feature_maps_dict['C' + str(layer)] up_sample_shape = tf.shape(c) up_sample = tf.image.resize_nearest_neighbor(p, [up_sample_shape[1], up_sample_shape[2]], name='build_P%d/up_sample_nearest_neighbor' % layer) c = slim.conv2d(c, num_outputs=256, kernel_size=[1, 1], stride=1, scope='build_P%d/reduce_dimension' % layer) p = up_sample + c p = slim.conv2d(p, 256, kernel_size=[3, 3], stride=1, padding='SAME', scope='build_P%d/avoid_aliasing' % layer) feature_pyramid['P' + str(layer)] = p return feature_pyramid
def _l2_regularized_embedding(self, n_class, h_dim, scope_name, var_name='y_emb'): with tf.variable_scope(scope_name): embeddings = tf.get_variable( name=var_name, shape=[n_class, h_dim], regularizer=slim.l2_regularizer(1e-6)) return embeddings
def create_model(self, model_input, vocab_size, num_frames, **unused_params): """Creates a model which uses a logistic classifier over the average of the frame-level features. This class is intended to be an example for implementors of frame level models. If you want to train a model over averaged features it is more efficient to average them beforehand rather than on the fly. Args: model_input: A 'batch_size' x 'max_frames' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. num_frames: A vector of length 'batch' which indicates the number of frames for each video (before padding). Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are 'batch_size' x 'num_classes'. """ num_frames = tf.cast(tf.expand_dims(num_frames, 1), tf.float32) feature_size = model_input.get_shape().as_list()[2] denominators = tf.reshape( tf.tile(num_frames, [1, feature_size]), [-1, feature_size]) avg_pooled = tf.reduce_sum(model_input, axis=[1]) / denominators output = slim.fully_connected( avg_pooled, vocab_size, activation_fn=tf.nn.sigmoid, weights_regularizer=slim.l2_regularizer(1e-8)) return {"predictions": output}
def inference(self): _x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) # tf.image_summary(_x.op.name, _x, max_images=10, collections=[digits.GraphKeys.SUMMARIES_TRAIN]) # Split out the color channels _, model_g, model_b = tf.split(_x, 3, 3, name='split_channels') # tf.image_summary(model_g.op.name, model_g, max_images=10, collections=[digits.GraphKeys.SUMMARIES_TRAIN]) # tf.image_summary(model_b.op.name, model_b, max_images=10, collections=[digits.GraphKeys.SUMMARIES_TRAIN]) with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.0005)): with tf.variable_scope("siamese") as scope: def make_tower(net): net = slim.conv2d(net, 20, [5, 5], padding='VALID', scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool1') net = slim.conv2d(net, 50, [5, 5], padding='VALID', scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='VALID', scope='pool2') net = slim.flatten(net) net = slim.fully_connected(net, 500, scope='fc1') net = slim.fully_connected(net, 2, activation_fn=None, scope='fc2') return net model_g = make_tower(model_g) model_g = tf.reshape(model_g, shape=[-1, 2]) scope.reuse_variables() model_b = make_tower(model_b) model_b = tf.reshape(model_b, shape=[-1, 2]) return [model_g, model_b]
def build_graph(self, image, label): image = tf.expand_dims(image, 3) image = image * 2 - 1 is_training = get_current_tower_context().is_training with slim.arg_scope([slim.layers.fully_connected], weights_regularizer=slim.l2_regularizer(1e-5)): l = slim.layers.conv2d(image, 32, [3, 3], scope='conv0') l = slim.layers.max_pool2d(l, [2, 2], scope='pool0') l = slim.layers.conv2d(l, 32, [3, 3], padding='SAME', scope='conv1') l = slim.layers.conv2d(l, 32, [3, 3], scope='conv2') l = slim.layers.max_pool2d(l, [2, 2], scope='pool1') l = slim.layers.conv2d(l, 32, [3, 3], scope='conv3') l = slim.layers.flatten(l, scope='flatten') l = slim.layers.fully_connected(l, 512, scope='fc0') l = slim.layers.dropout(l, is_training=is_training) logits = slim.layers.fully_connected(l, 10, activation_fn=None, scope='fc1') cost = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=label) cost = tf.reduce_mean(cost, name='cross_entropy_loss') acc = tf.to_float(tf.nn.in_top_k(logits, label, 1)) acc = tf.reduce_mean(acc, name='accuracy') summary.add_moving_summary(acc) summary.add_moving_summary(cost) summary.add_param_summary(('.*/weights', ['histogram', 'rms'])) # slim uses different variable names return cost + regularize_cost_from_collection()
def inference(image_batch, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0): with tf.variable_scope('LResnetE_IR'): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), biases_initializer=None, #default no biases activation_fn=None, normalizer_fn=None ): with slim.arg_scope([slim.conv2d], kernel_size=3): with slim.arg_scope([slim.batch_norm], decay=0.995, epsilon=1e-5, scale=True, is_training=phase_train, activation_fn=prelu, updates_collections=None, variables_collections=[ tf.GraphKeys.TRAINABLE_VARIABLES ] ): return LResnet50E_IR(images=image_batch, keep_probability=keep_probability, phase_train=phase_train, bottleneck_layer_size=bottleneck_layer_size, reuse=None)
def inference(image_batch, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0): batch_norm_params = { 'decay': 0.995, 'epsilon': 0.001, 'scale':True, 'is_training': phase_train, 'updates_collections': None, 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ] } with tf.variable_scope('Resface'): with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), activation_fn=prelu, normalizer_fn=slim.batch_norm, #normalizer_fn=None, normalizer_params=batch_norm_params): with slim.arg_scope([slim.conv2d], kernel_size=3): return resface20(images=image_batch, keep_probability=keep_probability, phase_train=phase_train, bottleneck_layer_size=bottleneck_layer_size, reuse=None)
def __init__(self,is_training): self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,64,64,3],name='input_image') self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label') self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd') #logits, end_points = resnet_v2.resnet_v2_50(self.input_image, num_classes=100, is_training=True) # flatten_hist = tf.reshape(self.input_image,[-1,96]) self.keep_prob = tf.placeholder(tf.float32) weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay) flatten_hist = tf.reshape(self.input_image,[-1,3*64*64]) flatten_hist = tf.concat([flatten_hist,self.input_nlcd],1) x = slim.fully_connected(flatten_hist, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1') x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2') flatten_hist = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer, scope='decoder/fc_3') all_logits = [] all_output = [] for i in range(100): if i == 0 : current_input_x = flatten_hist else: current_output = tf.concat(all_output,1) current_input_x = tf.concat([flatten_hist,current_output],1) x = slim.fully_connected(current_input_x, 256,weights_regularizer=weights_regularizer) x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer) #x = slim.fully_connected(x, 17,weights_regularizer=weights_regularizer) x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) all_logits.append(slim.fully_connected(x, 1, activation_fn=None, weights_regularizer=weights_regularizer)) all_output.append(tf.sigmoid(all_logits[i])) final_logits = tf.concat(all_logits,1) final_output = tf.sigmoid(final_logits) self.output = final_output self.ce_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_label,logits=final_logits),1)) slim.losses.add_loss(self.ce_loss) tf.summary.scalar('ce_loss',self.ce_loss) # l2 loss self.l2_loss = tf.add_n(slim.losses.get_regularization_losses()) tf.summary.scalar('l2_loss',self.l2_loss) #total loss self.total_loss = slim.losses.get_total_loss() tf.summary.scalar('total_loss',self.total_loss) #self.output = tf.sigmoid(x)
def build_resnet50(inputs, get_pred, is_training, var_scope): batch_norm_params = {'is_training': is_training} with tf.variable_scope(var_scope) as sc: with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(0.0001), activation_fn=tf.nn.relu): conv1 = conv(inputs, 64, 7, 2) # H/2 - 64D pool1 = maxpool(conv1, 3) # H/4 - 64D conv2 = resblock(pool1, 64, 3) # H/8 - 256D conv3 = resblock(conv2, 128, 4) # H/16 - 512D conv4 = resblock(conv3, 256, 6) # H/32 - 1024D conv5 = resblock(conv4, 512, 3) # H/64 - 2048D skip1 = conv1 skip2 = pool1 skip3 = conv2 skip4 = conv3 skip5 = conv4 # DECODING upconv6 = upconv(conv5, 512, 3, 2) #H/32 upconv6 = resize_like(upconv6, skip5) concat6 = tf.concat([upconv6, skip5], 3) iconv6 = conv(concat6, 512, 3, 1) upconv5 = upconv(iconv6, 256, 3, 2) #H/16 upconv5 = resize_like(upconv5, skip4) concat5 = tf.concat([upconv5, skip4], 3) iconv5 = conv(concat5, 256, 3, 1) upconv4 = upconv(iconv5, 128, 3, 2) #H/8 upconv4 = resize_like(upconv4, skip3) concat4 = tf.concat([upconv4, skip3], 3) iconv4 = conv(concat4, 128, 3, 1) pred4 = get_pred(iconv4) upred4 = upsample_nn(pred4, 2) upconv3 = upconv(iconv4, 64, 3, 2) #H/4 concat3 = tf.concat([upconv3, skip2, upred4], 3) iconv3 = conv(concat3, 64, 3, 1) pred3 = get_pred(iconv3) upred3 = upsample_nn(pred3, 2) upconv2 = upconv(iconv3, 32, 3, 2) #H/2 concat2 = tf.concat([upconv2, skip1, upred3], 3) iconv2 = conv(concat2, 32, 3, 1) pred2 = get_pred(iconv2) upred2 = upsample_nn(pred2, 2) upconv1 = upconv(iconv2, 16, 3, 2) #H concat1 = tf.concat([upconv1, upred2], 3) iconv1 = conv(concat1, 16, 3, 1) pred1 = get_pred(iconv1) return [pred1, pred2, pred3, pred4]
def prediction_layer(cfg, input, name, num_outputs): with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], padding='SAME', activation_fn=None, normalizer_fn=None, weights_regularizer=slim.l2_regularizer(cfg.weight_decay)): with tf.variable_scope(name): pred = slim.conv2d_transpose(input, num_outputs, kernel_size=[3, 3], stride=2, scope='block4') return pred
def _merge(self, var_list, fan_out, l2_reg=1e-6): x = 0. with slim.arg_scope( [slim.fully_connected], num_outputs=fan_out, weights_regularizer=slim.l2_regularizer(l2_reg), normalizer_fn=None, activation_fn=None): for var in var_list: x = x + slim.fully_connected(var) return slim.bias_add(x)
def inference(self): _x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.05)): # 1*H*W -> 32*H*W model = slim.conv2d(_x, 32, [3, 3], padding='SAME', scope='conv1') # 32*H*W -> 1024*H/16*W/16 model = slim.conv2d(model, 1024, [16, 16], padding='VALID', scope='conv2', stride=16) model = slim.conv2d_transpose(model, self.input_shape[2], [16, 16], stride=16, padding='VALID', activation_fn=None, scope='deconv_1') return model
def ds_cnn_arg_scope(weight_decay=0): """Defines the default ds_cnn argument scope. Args: weight_decay: The weight decay to use for regularizing the model. Returns: An `arg_scope` to use for the DS-CNN model. """ with slim.arg_scope( [slim.convolution2d, slim.separable_convolution2d], weights_initializer=slim.initializers.xavier_initializer(), biases_initializer=slim.init_ops.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay)) as sc: return sc
def construct_net(self,is_trained = True): with slim.arg_scope([slim.conv2d], padding='VALID', weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.conv2d(self.input_images,6,[5,5],1,padding='SAME',scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.conv2d(net,16,[5,5],1,scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.conv2d(net,120,[5,5],1,scope='conv5') net = slim.flatten(net, scope='flat6') net = slim.fully_connected(net, 84, scope='fc7') net = slim.dropout(net, self.dropout,is_training=is_trained, scope='dropout8') digits = slim.fully_connected(net, 10, scope='fc9') return digits
def model(images, weight_decay=1e-5, is_training=True): ''' define the model, we use slim's implemention of resnet ''' images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope([slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2']] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i-1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format(i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d(g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * FLAGS.text_scale angle_map = (slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi/2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def fc_network(x, neurons, wt_decay, name, num_pred=None, offset=0, batch_norm_param=None, dropout_ratio=0.0, is_training=None): if dropout_ratio > 0: assert(is_training is not None), \ 'is_training needs to be defined when trainnig with dropout.' repr = [] for i, neuron in enumerate(neurons): init_var = np.sqrt(2.0/neuron) if batch_norm_param is not None: x = slim.fully_connected(x, neuron, activation_fn=None, weights_initializer=tf.random_normal_initializer(stddev=init_var), weights_regularizer=slim.l2_regularizer(wt_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param, biases_initializer=tf.zeros_initializer(), scope='{:s}_{:d}'.format(name, offset+i)) else: x = slim.fully_connected(x, neuron, activation_fn=tf.nn.relu, weights_initializer=tf.random_normal_initializer(stddev=init_var), weights_regularizer=slim.l2_regularizer(wt_decay), biases_initializer=tf.zeros_initializer(), scope='{:s}_{:d}'.format(name, offset+i)) if dropout_ratio > 0: x = slim.dropout(x, keep_prob=1-dropout_ratio, is_training=is_training, scope='{:s}_{:d}'.format('dropout_'+name, offset+i)) repr.append(x) if num_pred is not None: init_var = np.sqrt(2.0/num_pred) x = slim.fully_connected(x, num_pred, weights_regularizer=slim.l2_regularizer(wt_decay), weights_initializer=tf.random_normal_initializer(stddev=init_var), biases_initializer=tf.zeros_initializer(), activation_fn=None, scope='{:s}_pred'.format(name)) return x, repr
def network(inputs): '''Define the network''' with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = tf.reshape(inputs,[-1,FLAGS.im_size ,FLAGS.im_size,3]) net = slim.conv2d(net, 32, [3,3], scope='conv1') net = slim.max_pool2d(net, [4,4], scope = 'conv1') net = slim.conv2d(net,128,[3,3], scope = 'conv2') net = slim.max_pool2d(net,[4,4], scope = 'pool2') net = slim.flatten(net) net = slim.fully_connected(net,64, scope = 'fc') net = slim.fully_connected(net, n_classes, activation_fn = None, scope = 'output') return net
def create_model(self, model_input, vocab_size, l2_penalty=1e-8, **unused_params): """Creates a logistic model. Args: model_input: 'batch' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes.""" output = slim.fully_connected( model_input, vocab_size, activation_fn=tf.nn.sigmoid, weights_regularizer=slim.l2_regularizer(l2_penalty)) return {"predictions": output}
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) # scale (divide by MNIST std) x = x * 0.0125 with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.0005)): model = slim.conv2d(x, 20, [5, 5], padding='VALID', scope='conv1') model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool1') model = slim.conv2d(model, 50, [5, 5], padding='VALID', scope='conv2') model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool2') model = slim.flatten(model) model = slim.fully_connected(model, 500, scope='fc1') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1') model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc2') return model
def inference(images, keep_probability, phase_train=True, weight_decay=0.0, reuse=None): batch_norm_params = { # Decay for the moving averages. 'decay': 0.995, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, } with slim.arg_scope([slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): return inception_resnet_v1(images, is_training=phase_train, dropout_keep_prob=keep_probability, reuse=reuse)
def rpn_net(self): rpn_encode_boxes_list = [] rpn_scores_list = [] with tf.variable_scope('rpn_net'): with slim.arg_scope([slim.conv2d], weights_regularizer=slim.l2_regularizer(self.rpn_weight_decay)): for level in self.level: if self.share_head: reuse_flag = None if level == 'P2' else True scope_list = ['conv2d_3x3', 'rpn_classifier', 'rpn_regressor'] # in the begining(i,e, P2), we should create variables, then sharing variables in P3, P4, P5 else: reuse_flag = None scope_list = ['conv2d_3x3_'+level, 'rpn_classifier_'+level, 'rpn_regressor_'+level] rpn_conv2d_3x3 = slim.conv2d(inputs=self.feature_pyramid[level], num_outputs=256, kernel_size=[self.kernel_size, self.kernel_size], stride=1, scope=scope_list[0], reuse=reuse_flag) rpn_box_scores = slim.conv2d(rpn_conv2d_3x3, num_outputs=2 * self.num_of_anchors_per_location, kernel_size=[1, 1], stride=1, scope=scope_list[1], activation_fn=None, reuse=reuse_flag) rpn_encode_boxes = slim.conv2d(rpn_conv2d_3x3, num_outputs=5 * self.num_of_anchors_per_location, kernel_size=[1, 1], stride=1, scope=scope_list[2], activation_fn=None, reuse=reuse_flag) rpn_box_scores = tf.reshape(rpn_box_scores, [-1, 2]) rpn_encode_boxes = tf.reshape(rpn_encode_boxes, [-1, 5]) rpn_scores_list.append(rpn_box_scores) rpn_encode_boxes_list.append(rpn_encode_boxes) rpn_all_encode_boxes = tf.concat(rpn_encode_boxes_list, axis=0) rpn_all_boxes_scores = tf.concat(rpn_scores_list, axis=0) return rpn_all_encode_boxes, rpn_all_boxes_scores
def resnet_arg_scope(weight_decay=0.0001, batch_norm_decay=0.997, batch_norm_epsilon=1e-5, batch_norm_scale=True): """Defines the default ResNet arg scope. TODO(gpapan): The batch-normalization related default values above are appropriate for use in conjunction with the reference ResNet models released at https://github.com/KaimingHe/deep-residual-networks. When training ResNets from scratch, they might need to be tuned. Args: weight_decay: The weight decay to use for regularizing the model. batch_norm_decay: The moving average decay when estimating layer activation statistics in batch normalization. batch_norm_epsilon: Small constant to prevent division by zero when normalizing activations by their variance in batch normalization. batch_norm_scale: If True, uses an explicit `gamma` multiplier to scale the activations in the batch normalization layer. Returns: An `arg_scope` to use for the resnet models. """ batch_norm_params = { 'decay': batch_norm_decay, 'epsilon': batch_norm_epsilon, 'scale': batch_norm_scale, 'updates_collections': tf.GraphKeys.UPDATE_OPS, } with slim.arg_scope( [slim.conv2d], weights_regularizer=slim.l2_regularizer(weight_decay), weights_initializer=slim.variance_scaling_initializer(), activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with slim.arg_scope([slim.batch_norm], **batch_norm_params): # The following implies padding='SAME' for pool1, which makes feature # alignment easier for dense prediction tasks. This is also used in # https://github.com/facebook/fb.resnet.torch. However the accompanying # code of 'Deep Residual Learning for Image Recognition' uses # padding='VALID' for pool1. You can switch to that choice by setting # slim.arg_scope([slim.max_pool2d], padding='VALID'). with slim.arg_scope([slim.max_pool2d], padding='SAME') as arg_sc: return arg_sc
def build_backbones(self): inputs = self.inputs with slim.arg_scope([slim.conv2d, slim.fully_connected], padding='SAME', weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), activation_fn=tf.nn.relu): net = slim.repeat(inputs, 2, slim.conv2d, 64, [3, 3], scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv2') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv4') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], scope='conv5') net = slim.max_pool2d(net, [2, 2], scope='pool5') self.vgg_head = net
def __init__(self,is_training): z_dim = FLAGS.z_dim batch_size = FLAGS.batch_size self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,64,64,3],name='input_image') self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd') self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label') self.keep_prob = tf.placeholder(tf.float32) weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay) flatten_hist = tf.reshape(self.input_image,[-1,3*64*64]) # flatten_hist = slim.fully_connected(flatten_hist, 1024,weights_regularizer=weights_regularizer,scope='fig/fc_1') # flatten_hist = slim.fully_connected(flatten_hist, 256,weights_regularizer=weights_regularizer, scope='fig/fc_2') # flatten_hist = slim.fully_connected(flatten_hist, 25,weights_regularizer=weights_regularizer, scope='fig/fc_3') self.image_feature_encoder = flatten_hist self.image_feature_decoder = flatten_hist ############## Q(z|X) ############### ############## Sample_z ############### eps = tf.random_normal(shape=[batch_size,z_dim]) # self.sample_z = z_miu + tf.exp(z_logvar / 2) * eps self.sample_z = eps ############## P(X|z) ############### x = tf.concat([self.input_nlcd,self.image_feature_decoder,self.sample_z],1) x = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1') x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2') x = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer, scope='decoder/fc_3') x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) self.logits = slim.fully_connected(x, 100, activation_fn=None, weights_regularizer=weights_regularizer,scope='decoder/logits') self.output = tf.sigmoid(self.logits,name='decoder/output')
def inference(images, keep_probability, phase_train=True, bottleneck_layer_size=128, weight_decay=0.0, reuse=None): batch_norm_params = { # Decay for the moving averages. 'decay': 0.995, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], } with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): return inception_resnet_v2(images, is_training=phase_train, dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse)
def main(args): project_dir = os.path.dirname(os.getcwd()) network = importlib.import_module(args.model_def) with open(join(project_dir, 'config.yaml'), 'r') as f: cfg = yaml.load(f) if cfg['specs']['set_gpu']: os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2' os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = str(cfg['base_conf']['gpu_num']) subdir = '%s_center_loss_factor_%1.2f' % (args.data_dir, args.center_loss_factor) # test = os.path.expanduser(args.logs_base_dir) log_dir = os.path.join(project_dir, 'fine_tuning_process', 'logs', subdir) if not os.path.isdir(log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(project_dir, 'fine_tuning_process', 'models', subdir) if not os.path.isdir(model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) data_dir = os.path.join(project_dir, 'fine_tuning_process', 'data', args.data_dir, 'train') train_set = facenet.get_dataset(data_dir) if args.filter_filename: train_set = filter_dataset(train_set, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.join(project_dir, 'fine_tuning_process', 'models', args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) lfw_dir = os.path.join(project_dir, 'fine_tuning_process', 'data', args.data_dir, 'test') print('LFW directory: %s' % lfw_dir) # Read the file containing the pairs used for testing lfw_pairs = os.path.join(project_dir, 'fine_tuning_process', 'data', args.data_dir, 'pairs.txt') pairs = lfw.read_pairs(lfw_pairs) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths_personal(lfw_dir, pairs) data_paths = tools.get_format_file(data_dir, 2, r'.+\.jpeg$') meta_data_path = os.path.join(model_dir, 'metadata.tsv') with open(meta_data_path, 'w') as f: f.write("Index\tLabel\n") for d in data_paths: tmp = os.path.split(d) t = os.path.split(tmp[0]) f.write("%s\t%s\n" % (tmp[1], t[1])) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # get soft labels with open(join(data_dir, 'soft_label.pk'), 'rb') as f: confidence_score = pickle.load(f) image_list, soft_labels_list = facenet.get_image_paths_and_soft_labels(train_set, confidence_score) soft_labels_array = np.array(soft_labels_list) soft_labels = ops.convert_to_tensor(soft_labels_array, dtype=tf.float32) assert len(image_list) > 0, 'The dataset should not be empty' # Create a queue that produces indices into the image_list and label_list range_size = array_ops.shape(soft_labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many(args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') soft_labels_placeholder = tf.placeholder(tf.float32, shape=(None, nrof_classes), name='soft_labels') input_queue = data_flow_ops.FIFOQueue(capacity=100000, dtypes=[tf.string, tf.float32], shapes=[(1,), (nrof_classes,)], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many([image_paths_placeholder, soft_labels_placeholder], name='enqueue_op') nrof_preprocess_threads = 4 images_and_softlabels = [] for _ in range(nrof_preprocess_threads): filenames, soft_labels = input_queue.dequeue() images = [] for filename in tf.unstack(filenames): file_contents = tf.read_file(filename) image = tf.image.decode_image(file_contents, channels=3) if args.random_rotate: image = tf.py_func(facenet.random_rotate_image, [image], tf.uint8) if args.random_crop: image = tf.random_crop(image, [args.image_size, args.image_size, 3]) else: image = tf.image.resize_image_with_crop_or_pad(image, args.image_size, args.image_size) if args.random_flip: image = tf.image.random_flip_left_right(image) # pylint: disable=no-member image.set_shape((args.image_size, args.image_size, 3)) images.append(tf.image.per_image_standardization(image)) images_and_softlabels.append([images, soft_labels]) image_batch, soft_label_batch = tf.train.batch_join( images_and_softlabels, batch_size=batch_size_placeholder) image_batch = tf.squeeze(image_batch, 1) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') soft_label_batch = tf.identity(soft_label_batch, 'soft_label_batch') print('Total number of classes: %d' % nrof_classes) print('Total number of examples: %d' % len(image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) # fine_tuning = slim.fully_connected(prelogits, args.embedding_size, activation_fn=None, # scope='FineTuning', reuse=False, trainable=True) logits = slim.fully_connected(prelogits, nrof_classes, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Add center loss if args.center_loss_factor > 0.0: prelogits_center_loss, _ = facenet.fuzzy_center_loss(prelogits, soft_label_batch, args.center_loss_alfa, args.fuzzier, nrof_classes) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) tf.summary.scalar('prelogits_center_loss', prelogits_center_loss) learning_rate = tf.train.exponential_decay(learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.softmax_cross_entropy_with_logits_v2( labels=soft_label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) # Calculate the total losses regularization_losses = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver all_vars = tf.trainable_variables() var_to_restore = [v for v in all_vars if not v.name.startswith('Logits')] saver = tf.train.Saver(var_to_restore, max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if args.pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # result = sess.graph.get_tensor_by_name("InceptionResnetV1/Bottleneck/weights:0") # pre = sess.graph.get_tensor_by_name("InceptionResnetV1/Block8/Branch_1/Conv2d_0c_3x1/weights:0") # tf.stop_gradient(persisted_result) # print(result.eval()) # print("======") # print(pre.eval()) # Training and validation loop print('Running training') epoch = 0 pre_acc = -1 while epoch < args.max_nrof_epochs: step = sess.run(global_step, feed_dict=None) epoch = step // args.epoch_size # Train for one epoch train(args, sess, epoch, image_list, soft_labels_array, index_dequeue_op, enqueue_op, image_paths_placeholder, soft_labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, logits) # print(result.eval()) # print("======") # print(pre.eval()) # Save variables and the metagraph if it doesn't exist already # Evaluate on LFW if args.lfw_dir: acc = evaluate(sess, enqueue_op, image_paths_placeholder, soft_labels_placeholder, phase_train_placeholder, batch_size_placeholder, embeddings, soft_label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, nrof_classes, prelogits_center_loss) if acc > pre_acc: save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, step) pre_acc = acc return model_dir
def model_fn(features, labels, mode, params): is_training = mode == tf.estimator.ModeKeys.TRAIN # Inputs tokens = features['features'] # (N, L) token_lengths = features['feature_length'] # (N,) sequence_mask = tf.sequence_mask(maxlen=tf.shape(tokens)[1], lengths=token_lengths) # (N,L) n = tf.shape(tokens)[0] L = tf.shape(tokens)[1] with tf.control_dependencies([ tf.assert_greater_equal(params.flat_length, token_lengths, message="Tokens longer than tree size"), tf.assert_greater(vocab_size, tokens, message="Tokens larger than vocab"), tf.assert_greater_equal(tokens, 0, message="Tokens less than 0") ]): tokens = tf.identity(tokens) if params.l2 > 0: weights_regularizer = slim.l2_regularizer(params.l2) else: weights_regularizer = None # Encoder mu_t, logsigma_t = vae_flat_encoder_simple( tokens=tokens, token_lengths=token_lengths, vocab_size=vocab_size, params=params, n=n, weights_regularizer=weights_regularizer ) # (L,N,D) mu = tf.transpose(mu_t, (1, 0, 2)) # (N,L,D) logsigma = tf.transpose(logsigma_t, (1, 0, 2)) # (N,L,D) # Sampling idx = tf.where(sequence_mask) with tf.name_scope("kl"): selected_mu = tf.gather_nd(params=mu, indices=idx) selected_logsigma = tf.gather_nd(params=logsigma, indices=idx) latent_sample_values, latent_prior_sample_values = kl( mu=selected_mu, logsigma=selected_logsigma, params=params, n=n) latent_sample = tf.scatter_nd( updates=latent_sample_values, indices=idx, shape=(n, L, latent_sample_values.shape[-1].value) ) # (N,L,D) latent_prior_sample = tf.scatter_nd( updates=latent_prior_sample_values, indices=idx, shape=(n, L, latent_prior_sample_values.shape[-1].value) ) # (N,L,D) # Decoder with tf.variable_scope('vae_decoder') as decoder_scope: logits, penalty = vae_decoder_dag( latent=latent_sample, vocab_size=vocab_size, sequence_lengths=token_lengths, params=params, weights_regularizer=weights_regularizer, n=n, is_training=is_training ) with tf.name_scope("dag_penalty"): penalty_scale = get_penalty_scale_logistic(params) dag_penalty_raw = tf.reduce_mean(tf.square(penalty)) weighted_dag_penalty = penalty_scale * dag_penalty_raw tf.losses.add_loss(loss=weighted_dag_penalty, loss_collection=tf.GraphKeys.REGULARIZATION_LOSSES) tf.summary.scalar('dag_penalty_scale', penalty_scale) tf.summary.scalar('dag_penalty_raw', dag_penalty_raw) tf.summary.scalar('dag_penalty_weighted', weighted_dag_penalty) # Loss calculation logits_values = tf.gather_nd(params=logits, indices=idx) labels_values = tf.gather_nd(params=tokens, indices=idx) onehot_labels_values = tf.one_hot(indices=labels_values, depth=vocab_size) loss_values = tf.losses.softmax_cross_entropy( onehot_labels=onehot_labels_values, logits=logits_values, reduction=tf.losses.Reduction.NONE, loss_collection=None ) loss_arr = tf.scatter_nd(updates=loss_values, indices=idx, shape=(n, L)) loss_n = tf.reduce_sum(loss_arr, axis=-1) loss = tf.reduce_mean(loss_n) tf.losses.add_loss(loss) tf.summary.scalar("softmax_cross_entropy", loss) total_loss = tf.losses.get_total_loss() # Generated data with tf.variable_scope(decoder_scope, reuse=True): glogits, _ = vae_decoder_dag( latent=latent_prior_sample, vocab_size=vocab_size, sequence_lengths=token_lengths, params=params, weights_regularizer=weights_regularizer, n=n, is_training=is_training ) # Hooks autoencode_hook = DAGHook( logits=logits, true=tokens, vocab=vocab, path=os.path.join(run_config.model_dir, "autoencoded", "autoencoded-{:08d}.csv"), name="Autoencoded", idx=idx ) generate_hook = DAGHook( logits=glogits, true=tokens, vocab=vocab, path=os.path.join(run_config.model_dir, "generated", "generated-{:08d}.csv"), name="Generated", idx=idx ) evaluation_hooks = [autoencode_hook, generate_hook] #tf.summary.scalar('model_total_loss', total_loss) # Train optimizer = tf.train.AdamOptimizer(params.lr) train_op = slim.learning.create_train_op( total_loss, optimizer, clip_gradient_norm=params.clip_gradient_norm) eval_metric_ops = { 'cross_entropy_eval': tf.metrics.mean(loss_n), 'token_lengths_eval': tf.metrics.mean(token_lengths) } return tf.estimator.EstimatorSpec( mode=mode, loss=total_loss, eval_metric_ops=eval_metric_ops, evaluation_hooks=evaluation_hooks, train_op=train_op)
def __feature_sequence_extraction(self, input_tensor): is_training = True if self.__phase == 'train' else False # is_training = True with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer( stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), biases_initializer=None): net = slim.repeat( input_tensor, 2, slim.conv2d, 64, kernel_size=3, stride=1, scope='conv1' ) # input_tensor shape(32,64,?,3) to_shape(32,1,?,x) net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, kernel_size=3, stride=1, scope='conv2') net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool2') net = slim.repeat(net, 2, slim.conv2d, 256, kernel_size=3, stride=1, scope='conv3') net = slim.max_pool2d(net, kernel_size=[2, 1], stride=[2, 1], scope='pool3') net = slim.conv2d(net, 512, kernel_size=3, stride=1, scope='conv4') # net = slim.batch_norm(net, decay=_BATCH_DECAY, is_training=is_training, scope='bn4') bn_layer(x=net, scope='bn4', is_training=is_training, decay=_BATCH_DECAY) net = slim.conv2d(net, 512, kernel_size=3, stride=1, scope='conv5') # net = slim.batch_norm(net, decay=_BATCH_DECAY, is_training=is_training, scope='bn5') bn_layer(x=net, scope='bn5', is_training=is_training, decay=_BATCH_DECAY) net = slim.max_pool2d(net, kernel_size=[2, 1], stride=[2, 1], scope='pool5') net = slim.conv2d(net, 512, padding="VALID", kernel_size=[2, 1], stride=1, scope='conv6') # net = slim.repeat(input_tensor, 2, slim.conv2d, 64, kernel_size=4, stride=1, # scope='conv1') # input_tensor shape(32,64,?,3) to_shape(32,1,?,x) # net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool1') # net = slim.repeat(net, 2, slim.conv2d, 128, kernel_size=4, stride=1, scope='conv2') # net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool2') # net = slim.repeat(net, 2, slim.conv2d, 256, kernel_size=4, stride=1, scope='conv3') # net = slim.max_pool2d(net, kernel_size=[2, 1], stride=[2, 1], scope='pool3') # net = slim.conv2d(net, 512, kernel_size=4, stride=1, scope='conv4') # net = slim.batch_norm(net, decay=_BATCH_DECAY, is_training=is_training, scope='bn4') # net = slim.conv2d(net, 512, kernel_size=4, stride=1, scope='conv5') # net = slim.batch_norm(net, decay=_BATCH_DECAY, is_training=is_training, scope='bn5') # net = slim.max_pool2d(net, kernel_size=[2, 1], stride=[2, 1], scope='pool5') # net = slim.conv2d(net, 512, padding="VALID", kernel_size=[2, 1], stride=1, scope='conv6') return net
coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) x, y = get_input() prelogits, _ = inception_resnet_v1.inference(x, keep_probability=0.8, phase_train=True, bottleneck_layer_size=512, weight_decay=5e-5) logits = slim.fully_connected( prelogits, 8, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(5e-5), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.95) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default():
def encoder(self, images, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('encoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = images net = slim.conv2d(net, 32, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = slim.conv2d(net, 64, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = slim.conv2d(net, 128, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = slim.conv2d(net, 256, [4, 4], 2, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 256, [4, 4], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.flatten(net) fc1 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_1') fc2 = slim.fully_connected(net, self.latent_variable_dim, activation_fn=None, normalizer_fn=None, scope='Fc_2') return fc1, fc2
def inference_2(inputs, bottleneck_layer_size=128, weight_decay=0.0, reuse=None): #----tf.int32 to tf.bool #new_phase_train = tf.cast(phase_train,tf.bool) batch_norm_params = { # Decay for the moving averages. 'decay': 0.995, # epsilon to prevent 0s in variance. 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ], } with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): # return inception_resnet_v2(images, phase_train, # dropout_keep_prob=keep_probability, bottleneck_layer_size=bottleneck_layer_size, reuse=reuse) end_points = {} # phase_train_tranform = tf.where(phase_train == 0,False,True) #is_training = tf.cond(tf.greater(phase_train, 0), lambda: True, lambda: False) scope = 'InceptionResnetV2' with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout]): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat([tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net # pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) # net = slim.dropout(net, dropout_keep_prob, is_training=is_training, # scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net
def pfld_inference_for_mobileNetV3_small(input, weight_decay, batch_norm_params): layers = [ [16, 16, 3, 2, "RE", True, 16], [16, 24, 3, 2, "RE", False, 72], [24, 24, 3, 1, "RE", False, 88], [24, 40, 5, 2, "HS", True, 96], [40, 40, 5, 1, "HS", True, 240], [40, 40, 5, 1, "HS", True, 240], [40, 48, 5, 1, "HS", True, 120], [48, 48, 5, 1, "HS", True, 144], [48, 96, 5, 2, "HS", True, 288], [96, 96, 5, 1, "HS", True, 576], [96, 96, 5, 1, "HS", True, 576], ] reduction_ratio = 4 multiplier = 1 with tf.variable_scope('pfld_inference'): features = {} with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME'): print('PFLD input shape({}): {}'.format(input.name, input.get_shape())) # 112*112*3 out = slim.convolution2d(input, 16 * multiplier, [3, 3], stride=1, activation_fn=hard_swish, scope='conv_1') print(out.name, out.get_shape()) with tf.variable_scope("MobilenetV3_large"): for index in range(3): in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out = mobileNetV3_block(out, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out.name, out.get_shape()) # 28*28 features['auxiliary_input'] = out # 14*14 index = 3 in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out1 = mobileNetV3_block(out, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out1.name, out1.get_shape()) for index in range(4, 8): in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out1 = mobileNetV3_block(out1, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out1.name, out1.get_shape()) # 7*7 index = 8 in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out2 = mobileNetV3_block(out1, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out2.name, out2.get_shape()) for index in range(9, len(layers)): in_channels, out_channels, kernel_size, stride, activatation, se, expand_dims = layers[index] out_channels *= multiplier out2 = mobileNetV3_block(out2, "bneck{}".format(index), expand_dims, out_channels, kernel_size, stride, ratio=reduction_ratio, activation_fn=activatation, se=se, short_cut=(in_channels == out_channels)) print(out2.name, out2.get_shape()) out3 = slim.convolution2d(out2, 576, [1, 1], stride=1, activation_fn=hard_swish, dscope='conv_2') print(out3.name, out3.get_shape()) out3 = slim.avg_pool2d(out3, [out3.get_shape()[1], out3.get_shape()[2]], stride=1, scope='group_pool') print(out3.name, out3.get_shape()) out3 = slim.convolution2d(out3, 1280, [1, 1], stride=1, normalizer_fn=None, activation_fn=hard_swish, scope='conv_3') print(out3.name, out3.get_shape()) s1 = slim.flatten(out1) s2 = slim.flatten(out2) s3 = slim.flatten(out3) multi_scale = tf.concat([s1, s2, s3], 1) landmarks = slim.fully_connected(multi_scale, num_outputs=136, activation_fn=None, scope='fc') print(landmarks.name, landmarks.get_shape()) return features, landmarks
def main(args): network = importlib.import_module(args.model_def) image_size = (args.image_size, args.image_size) subdir = datetime.strftime(datetime.now(), '%Y%m%d-%H%M%S') log_dir = os.path.join(os.path.expanduser(args.logs_base_dir), subdir) if not os.path.isdir( log_dir): # Create the log directory if it doesn't exist os.makedirs(log_dir) model_dir = os.path.join(os.path.expanduser(args.models_base_dir), subdir) if not os.path.isdir( model_dir): # Create the model directory if it doesn't exist os.makedirs(model_dir) stat_file_name = os.path.join(log_dir, 'stat.h5') # Write arguments to a text file facenet.write_arguments_to_file(args, os.path.join(log_dir, 'arguments.txt')) # Store some git revision info in a text file in the log directory src_path, _ = os.path.split(os.path.realpath(__file__)) facenet.store_revision_info(src_path, log_dir, ' '.join(sys.argv)) np.random.seed(seed=args.seed) random.seed(args.seed) dataset = facenet.get_dataset(args.data_dir) if args.filter_filename: dataset = filter_dataset(dataset, os.path.expanduser(args.filter_filename), args.filter_percentile, args.filter_min_nrof_images_per_class) if args.validation_set_split_ratio > 0.0: train_set, val_set = facenet.split_dataset( dataset, args.validation_set_split_ratio, args.min_nrof_val_images_per_class, 'SPLIT_IMAGES') else: train_set, val_set = dataset, [] nrof_classes = len(train_set) print('Model directory: %s' % model_dir) print('Log directory: %s' % log_dir) pretrained_model = None if args.pretrained_model: pretrained_model = os.path.expanduser(args.pretrained_model) print('Pre-trained model: %s' % pretrained_model) if args.lfw_dir: print('LFW directory: %s' % args.lfw_dir) # Read the file containing the pairs used for testing pairs = lfw.read_pairs(os.path.expanduser(args.lfw_pairs)) # Get the paths for the corresponding images lfw_paths, actual_issame = lfw.get_paths( os.path.expanduser(args.lfw_dir), pairs) with tf.Graph().as_default(): tf.set_random_seed(args.seed) global_step = tf.Variable(0, trainable=False) # Get a list of image paths and their labels image_list, label_list = facenet.get_image_paths_and_labels(train_set) assert len(image_list) > 0, 'The training set should not be empty' val_image_list, val_label_list = facenet.get_image_paths_and_labels( val_set) # Create a queue that produces indices into the image_list and label_list labels = ops.convert_to_tensor(label_list, dtype=tf.int32) range_size = array_ops.shape(labels)[0] index_queue = tf.train.range_input_producer(range_size, num_epochs=None, shuffle=True, seed=None, capacity=32) index_dequeue_op = index_queue.dequeue_many( args.batch_size * args.epoch_size, 'index_dequeue') learning_rate_placeholder = tf.placeholder(tf.float32, name='learning_rate') batch_size_placeholder = tf.placeholder(tf.int32, name='batch_size') phase_train_placeholder = tf.placeholder(tf.bool, name='phase_train') image_paths_placeholder = tf.placeholder(tf.string, shape=(None, 1), name='image_paths') labels_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='labels') control_placeholder = tf.placeholder(tf.int32, shape=(None, 1), name='control') nrof_preprocess_threads = 4 input_queue = data_flow_ops.FIFOQueue( capacity=2000000, dtypes=[tf.string, tf.int32, tf.int32], shapes=[(1, ), (1, ), (1, )], shared_name=None, name=None) enqueue_op = input_queue.enqueue_many( [image_paths_placeholder, labels_placeholder, control_placeholder], name='enqueue_op') image_batch, label_batch = facenet.create_input_pipeline( input_queue, image_size, nrof_preprocess_threads, batch_size_placeholder) image_batch = tf.identity(image_batch, 'image_batch') image_batch = tf.identity(image_batch, 'input') label_batch = tf.identity(label_batch, 'label_batch') print('Number of classes in training set: %d' % nrof_classes) print('Number of examples in training set: %d' % len(image_list)) print('Number of classes in validation set: %d' % len(val_set)) print('Number of examples in validation set: %d' % len(val_image_list)) print('Building training graph') # Build the inference graph prelogits, _ = network.inference( image_batch, args.keep_probability, phase_train=phase_train_placeholder, bottleneck_layer_size=args.embedding_size, weight_decay=args.weight_decay) logits = slim.fully_connected( prelogits, len(train_set), activation_fn=None, weights_initializer=slim.initializers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(args.weight_decay), scope='Logits', reuse=False) embeddings = tf.nn.l2_normalize(prelogits, 1, 1e-10, name='embeddings') # Norm for the prelogits eps = 1e-4 prelogits_norm = tf.reduce_mean( tf.norm(tf.abs(prelogits) + eps, ord=args.prelogits_norm_p, axis=1)) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_norm * args.prelogits_norm_loss_factor) # Add center loss prelogits_center_loss, _ = facenet.center_loss(prelogits, label_batch, args.center_loss_alfa, nrof_classes) tf.add_to_collection(tf.GraphKeys.REGULARIZATION_LOSSES, prelogits_center_loss * args.center_loss_factor) learning_rate = tf.train.exponential_decay( learning_rate_placeholder, global_step, args.learning_rate_decay_epochs * args.epoch_size, args.learning_rate_decay_factor, staircase=True) tf.summary.scalar('learning_rate', learning_rate) # Calculate the average cross entropy loss across the batch cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits( labels=label_batch, logits=logits, name='cross_entropy_per_example') cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy') tf.add_to_collection('losses', cross_entropy_mean) correct_prediction = tf.cast( tf.equal(tf.argmax(logits, 1), tf.cast(label_batch, tf.int64)), tf.float32) accuracy = tf.reduce_mean(correct_prediction) # Calculate the total losses regularization_losses = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) total_loss = tf.add_n([cross_entropy_mean] + regularization_losses, name='total_loss') # Build a Graph that trains the model with one batch of examples and updates the model parameters train_op = facenet.train(total_loss, global_step, args.optimizer, learning_rate, args.moving_average_decay, tf.global_variables(), args.log_histograms) # Create a saver saver = tf.train.Saver(tf.trainable_variables(), max_to_keep=3) # Build the summary operation based on the TF collection of Summaries. summary_op = tf.summary.merge_all() # Start running operations on the Graph. gpu_options = tf.GPUOptions( per_process_gpu_memory_fraction=args.gpu_memory_fraction) sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options, log_device_placement=False)) sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) summary_writer = tf.summary.FileWriter(log_dir, sess.graph) coord = tf.train.Coordinator() tf.train.start_queue_runners(coord=coord, sess=sess) with sess.as_default(): if pretrained_model: print('Restoring pretrained model: %s' % pretrained_model) saver.restore(sess, pretrained_model) # Training and validation loop print('Running training') nrof_steps = args.max_nrof_epochs * args.epoch_size nrof_val_samples = int( math.ceil(args.max_nrof_epochs / args.validate_every_n_epochs) ) # Validate every validate_every_n_epochs as well as in the last epoch stat = { 'loss': np.zeros((nrof_steps, ), np.float32), 'center_loss': np.zeros((nrof_steps, ), np.float32), 'reg_loss': np.zeros((nrof_steps, ), np.float32), 'xent_loss': np.zeros((nrof_steps, ), np.float32), 'prelogits_norm': np.zeros((nrof_steps, ), np.float32), 'accuracy': np.zeros((nrof_steps, ), np.float32), 'val_loss': np.zeros((nrof_val_samples, ), np.float32), 'val_xent_loss': np.zeros((nrof_val_samples, ), np.float32), 'val_accuracy': np.zeros((nrof_val_samples, ), np.float32), 'lfw_accuracy': np.zeros((args.max_nrof_epochs, ), np.float32), 'lfw_valrate': np.zeros((args.max_nrof_epochs, ), np.float32), 'learning_rate': np.zeros((args.max_nrof_epochs, ), np.float32), 'time_train': np.zeros((args.max_nrof_epochs, ), np.float32), 'time_validate': np.zeros((args.max_nrof_epochs, ), np.float32), 'time_evaluate': np.zeros((args.max_nrof_epochs, ), np.float32), 'prelogits_hist': np.zeros((args.max_nrof_epochs, 1000), np.float32), } for epoch in range(1, args.max_nrof_epochs + 1): step = sess.run(global_step, feed_dict=None) # Train for one epoch t = time.time() cont = train( args, sess, epoch, image_list, label_list, index_dequeue_op, enqueue_op, image_paths_placeholder, labels_placeholder, learning_rate_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, global_step, total_loss, train_op, summary_op, summary_writer, regularization_losses, args.learning_rate_schedule_file, stat, cross_entropy_mean, accuracy, learning_rate, prelogits, prelogits_center_loss, args.random_rotate, args.random_crop, args.random_flip, prelogits_norm, args.prelogits_hist_max, args.use_fixed_image_standardization) stat['time_train'][epoch - 1] = time.time() - t if not cont: break t = time.time() if len(val_image_list) > 0 and ( (epoch - 1) % args.validate_every_n_epochs == args.validate_every_n_epochs - 1 or epoch == args.max_nrof_epochs): validate(args, sess, epoch, val_image_list, val_label_list, enqueue_op, image_paths_placeholder, labels_placeholder, control_placeholder, phase_train_placeholder, batch_size_placeholder, stat, total_loss, regularization_losses, cross_entropy_mean, accuracy, args.validate_every_n_epochs, args.use_fixed_image_standardization) stat['time_validate'][epoch - 1] = time.time() - t # Save variables and the metagraph if it doesn't exist already save_variables_and_metagraph(sess, saver, summary_writer, model_dir, subdir, epoch) # Evaluate on LFW t = time.time() if args.lfw_dir: evaluate(sess, enqueue_op, image_paths_placeholder, labels_placeholder, phase_train_placeholder, batch_size_placeholder, control_placeholder, embeddings, label_batch, lfw_paths, actual_issame, args.lfw_batch_size, args.lfw_nrof_folds, log_dir, step, summary_writer, stat, epoch, args.lfw_distance_metric, args.lfw_subtract_mean, args.lfw_use_flipped_images, args.use_fixed_image_standardization) stat['time_evaluate'][epoch - 1] = time.time() - t print('Saving statistics') with h5py.File(stat_file_name, 'w') as f: for key, value in stat.items(): f.create_dataset(key, data=value) return model_dir
def UNet_pp(inputs, reg, deep_supervision=True): # Unet ''' 1-1---> 1-2 ---> 1-3 ---> 1-4 --->1-5 \ / \ / \ / \ / 2-1 --->2-2 ---> 2-3 --->2-4 \ / \ / \ / 3-1 ---> 3-2 ---> 3-3 \ / \ / 4-1---> 4-2 \ / 5-1 ''' nb_filter = [32, 64, 128, 256, 512] conv1_1 = standard_unit(inputs, stage='stage_11', nb_filter=nb_filter[0]) pool1 = slim.max_pool2d(conv1_1, [2, 2], padding='SAME') conv2_1 = standard_unit(pool1, stage='stage_21', nb_filter=nb_filter[1]) pool2 = slim.max_pool2d(conv2_1, [2, 2], padding='SAME') conv3_1 = standard_unit(pool2, stage='stage_31', nb_filter=nb_filter[2]) pool3 = slim.max_pool2d(conv3_1, [2, 2], padding='SAME') conv4_1 = standard_unit(pool3, stage='stage_41', nb_filter=nb_filter[3]) pool4 = slim.max_pool2d(conv4_1, [2, 2], padding='SAME') conv5_1 = standard_unit(pool4, stage='stage_51', nb_filter=nb_filter[4]) up1_2 = upsample(conv2_1, num_outputs=nb_filter[0]) #up1_2 = slim.conv2d_transpose(conv2_1,num_outputs=nb_filter[0],kernel_size=2,stride=2) conv1_2 = tf.concat([conv1_1, up1_2], 3) #conv1_2 = crop_and_concat(conv1_1,up1_2) #conv1_2 = np.concatenate((conv1_1,up1_2),3) conv1_2 = standard_unit(conv1_2, stage='stage_12', nb_filter=nb_filter[0]) up2_2 = upsample(conv3_1, num_outputs=nb_filter[1]) #up2_2 = slim.conv2d_transpose(conv3_1,num_outputs=nb_filter[1],kernel_size=2,stride=2) conv2_2 = tf.concat([conv2_1, up2_2], 3) conv2_2 = standard_unit(conv2_2, stage='stage_22', nb_filter=nb_filter[1]) up3_2 = upsample(conv4_1, num_outputs=nb_filter[2]) #up3_2 = slim.conv2d_transpose(conv4_1,num_outputs=nb_filter[2],kernel_size=2,stride=2) conv3_2 = tf.concat([conv3_1, up3_2], 3) conv3_2 = standard_unit(conv3_2, stage='stage_32', nb_filter=nb_filter[2]) up4_2 = upsample(conv5_1, num_outputs=nb_filter[3]) #up4_2 = slim.conv2d_transpose(conv5_1,num_outputs=nb_filter[3],kernel_size=2,stride=2) conv4_2 = tf.concat([conv4_1, up4_2], 3) conv4_2 = standard_unit(conv4_2, stage='stage_42', nb_filter=nb_filter[3]) up1_3 = upsample(conv2_2, num_outputs=nb_filter[0]) #up1_3 = slim.conv2d_transpose(conv2_2,num_outputs=nb_filter[0],kernel_size=2,stride=2) conv1_3 = tf.concat([conv1_1, conv1_2, up1_3], 3) conv1_3 = standard_unit(conv1_3, stage='stage_13', nb_filter=nb_filter[0]) up2_3 = upsample(conv3_2, num_outputs=nb_filter[1]) #up2_3 = slim.conv2d_transpose(conv3_2,num_outputs=nb_filter[1],kernel_size=2,stride=2) conv2_3 = tf.concat([conv2_1, conv2_2, up2_3], 3) conv2_3 = standard_unit(conv2_3, stage='stage_23', nb_filter=nb_filter[1]) up3_3 = upsample(conv4_2, num_outputs=nb_filter[2]) #up3_3 = slim.conv2d_transpose(conv4_2,num_outputs=nb_filter[2],kernel_size=2,stride=2) conv3_3 = tf.concat([conv3_1, conv3_2, up3_3], 3) conv3_3 = standard_unit(conv3_3, stage='stage_33', nb_filter=nb_filter[2]) up1_4 = upsample(conv2_3, num_outputs=nb_filter[0]) #up1_4 = slim.conv2d_transpose(conv2_3,num_outputs=nb_filter[0],kernel_size=2,stride=2) conv1_4 = tf.concat([conv1_1, conv1_2, conv1_3, up1_4], 3) conv1_4 = standard_unit(conv1_4, stage='stage_14', nb_filter=nb_filter[0]) up2_4 = upsample(conv3_3, num_outputs=nb_filter[1]) #up2_4 = slim.conv2d_transpose(conv3_3,num_outputs=nb_filter[1],kernel_size=2,stride=2) conv2_4 = tf.concat([conv2_1, conv2_2, conv2_3, up2_4], 3) conv2_4 = standard_unit(conv2_4, stage='stage_24', nb_filter=nb_filter[1]) up1_5 = upsample(conv2_4, num_outputs=nb_filter[2]) #up1_5 = slim.conv2d_transpose(conv2_4,num_outputs=nb_filter[0],kernel_size=2,stride=2) conv1_5 = tf.concat([conv1_1, conv1_2, conv1_3, conv1_4, up1_5], 3) conv1_5 = standard_unit(conv1_5, stage='stage_15', nb_filter=nb_filter[0]) nestnet_output_1 = slim.conv2d( conv1_2, 1, [1, 1], rate=1, activation_fn=tf.nn.sigmoid, scope='output_1', weights_regularizer=slim.l2_regularizer(scale=0.0001)) nestnet_output_2 = slim.conv2d( conv1_3, 1, [1, 1], rate=1, activation_fn=tf.nn.sigmoid, scope='output_2', weights_regularizer=slim.l2_regularizer(scale=0.0001)) nestnet_output_3 = slim.conv2d( conv1_4, 1, [1, 1], rate=1, activation_fn=tf.nn.sigmoid, scope='output_3', weights_regularizer=slim.l2_regularizer(scale=0.0001)) nestnet_output_4 = slim.conv2d( conv1_5, 1, [1, 1], rate=1, activation_fn=tf.nn.sigmoid, scope='output_4', weights_regularizer=slim.l2_regularizer(scale=0.0001)) if deep_supervision: h_deconv_concat = tf.concat([ nestnet_output_1, nestnet_output_2, nestnet_output_3, nestnet_output_4 ], 3) h_deconv_concat = conv2d(inputs=h_deconv_concat, num_outputs=3, kernel_size=3, activation_fn=None) h_deconv_concat = tf.tanh(h_deconv_concat) return h_deconv_concat else: return nestnet_output_4
def build_whole_detection_network(self, input_img_batch, gtboxes_r_batch, gtboxes_h_batch): if self.is_training: # ensure shape is [M, 5] and [M, 6] gtboxes_r_batch = tf.reshape(gtboxes_r_batch, [-1, 6]) gtboxes_h_batch = tf.reshape(gtboxes_h_batch, [-1, 5]) gtboxes_r_batch = tf.cast(gtboxes_r_batch, tf.float32) gtboxes_h_batch = tf.cast(gtboxes_h_batch, tf.float32) img_shape = tf.shape(input_img_batch) # 1. build base network C2_, C4 = self.build_base_network(input_img_batch) C2 = slim.conv2d(C2_, num_outputs=1024, kernel_size=[1, 1], stride=1, scope='build_C2_to_1024') self.feature_pyramid = {'C2': C2, 'C4': C4} # 2. build rpn rpn_all_encode_boxes = {} rpn_all_boxes_scores = {} rpn_all_cls_score = {} anchors = {} with tf.variable_scope('build_rpn', regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): i = 0 for level in self.level: rpn_conv3x3 = slim.conv2d( self.feature_pyramid[level], 512, [3, 3], trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=tf.nn.relu, scope='rpn_conv/3x3_{}'.format(level)) rpn_cls_score = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location[i] * 2, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.INITIALIZER, activation_fn=None, scope='rpn_cls_score_{}'.format(level)) rpn_box_pred = slim.conv2d( rpn_conv3x3, self.num_anchors_per_location[i] * 4, [1, 1], stride=1, trainable=self.is_training, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, scope='rpn_bbox_pred_{}'.format(level)) rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) rpn_cls_score = tf.reshape(rpn_cls_score, [-1, 2]) rpn_cls_prob = slim.softmax( rpn_cls_score, scope='rpn_cls_prob_{}'.format(level)) # do the softmax rpn_all_cls_score[level] = rpn_cls_score rpn_all_boxes_scores[level] = rpn_cls_prob # do the softmax rpn_all_encode_boxes[level] = rpn_box_pred i += 1 # 3. generate_anchors i = 0 for level, base_anchor_size, stride in zip(self.level, self.base_anchor_size_list, self.stride): featuremap_height, featuremap_width = tf.shape( self.feature_pyramid[level])[1], tf.shape( self.feature_pyramid[level])[2] featuremap_height = tf.cast(featuremap_height, tf.float32) featuremap_width = tf.cast(featuremap_width, tf.float32) #anchor_scale = tf.constant(self.anchor_scales[i], dtype=tf.float32) #)anchor_ratio = tf.constant(self.anchor_ratios[i], dtype=tf.float32) anchor_scale = self.anchor_scales[i] anchor_ratio = self.anchor_ratios[i] tmp_anchors = anchor_utils.make_anchors( base_anchor_size=base_anchor_size, anchor_scales=anchor_scale, anchor_ratios=anchor_ratio, featuremap_height=featuremap_height, featuremap_width=featuremap_width, stride=stride, name="make_anchors_forRPN_{}".format(level)) tmp_anchors = tf.reshape(tmp_anchors, [-1, 4]) anchors[level] = tmp_anchors i += 1 # with tf.variable_scope('make_anchors'): # anchors = anchor_utils.make_anchors(height=featuremap_height, # width=featuremap_width, # feat_stride=cfgs.ANCHOR_STRIDE[0], # anchor_scales=cfgs.ANCHOR_SCALES, # anchor_ratios=cfgs.ANCHOR_RATIOS, base_size=16 # ) # 4. postprocess rpn proposals. such as: decode, clip, NMS rois = {} roi_scores = {} with tf.variable_scope('postprocess_RPN'): # rpn_cls_prob = tf.reshape(rpn_cls_score, [-1, 2]) # rpn_cls_prob = slim.softmax(rpn_cls_prob, scope='rpn_cls_prob') # rpn_box_pred = tf.reshape(rpn_box_pred, [-1, 4]) for level in self.level: rois_rpn, roi_scores_rpn = postprocess_rpn_proposals( rpn_bbox_pred=rpn_all_encode_boxes[level], rpn_cls_prob=rpn_all_boxes_scores[level], img_shape=img_shape, anchors=anchors[level], is_training=self.is_training) # rois[level] = rois # roi_scores[level] = roi_scores # rois shape [-1, 4] # +++++++++++++++++++++++++++++++++++++add img smry+++++++++++++++++++++++++++++++++++++++++++++++++++++++ rois[level] = rois_rpn roi_scores[level] = roi_scores_rpn if self.is_training: rois_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=rois_rpn, scores=roi_scores_rpn) tf.summary.image('all_rpn_rois_{}'.format(level), rois_in_img) score_gre_05 = tf.reshape( tf.where(tf.greater_equal(roi_scores_rpn, 0.5)), [-1]) score_gre_05_rois = tf.gather(rois_rpn, score_gre_05) score_gre_05_score = tf.gather(roi_scores_rpn, score_gre_05) score_gre_05_in_img = show_box_in_tensor.draw_boxes_with_categories( img_batch=input_img_batch, boxes=score_gre_05_rois, scores=score_gre_05_score) tf.summary.image('score_greater_05_rois_{}'.format(level), score_gre_05_in_img) # ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ rpn_labels = {} rpn_bbox_targets = {} labels_all = [] labels = {} bbox_targets_h = {} bbox_targets_r = {} bbox_targets_all_h = [] bbox_targets_all_r = [] if self.is_training: for level in self.level: with tf.variable_scope( 'sample_anchors_minibatch_{}'.format(level)): rpn_labels_one, rpn_bbox_targets_one = \ tf.py_func( anchor_target_layer, [gtboxes_h_batch, img_shape, anchors[level]], [tf.float32, tf.float32]) rpn_bbox_targets_one = tf.reshape(rpn_bbox_targets_one, [-1, 4]) rpn_labels_one = tf.to_int32( rpn_labels_one, name="to_int32_{}".format(level)) rpn_labels_one = tf.reshape(rpn_labels_one, [-1]) self.add_anchor_img_smry(input_img_batch, anchors[level], rpn_labels_one) # -----------------------------add to the dict------------------------------------------------------------- rpn_labels[level] = rpn_labels_one rpn_bbox_targets[level] = rpn_bbox_targets_one # --------------------------------------add smry----------------------------------------------------------- rpn_cls_category = tf.argmax(rpn_all_boxes_scores[level], axis=1) kept_rpppn = tf.reshape( tf.where(tf.not_equal(rpn_labels_one, -1)), [-1]) rpn_cls_category = tf.gather(rpn_cls_category, kept_rpppn) # 预测 acc = tf.reduce_mean( tf.to_float( tf.equal( rpn_cls_category, tf.to_int64(tf.gather(rpn_labels_one, kept_rpppn))))) tf.summary.scalar('ACC/rpn_accuracy_{}'.format(level), acc) with tf.control_dependencies([rpn_labels[level]]): with tf.variable_scope( 'sample_RCNN_minibatch_{}'.format(level)): rois_, labels_, bbox_targets_h_, bbox_targets_r_ = \ tf.py_func(proposal_target_layer, [rois[level], gtboxes_h_batch, gtboxes_r_batch], [tf.float32, tf.float32, tf.float32, tf.float32]) rois_fast = tf.reshape(rois_, [-1, 4]) labels_fast = tf.to_int32(labels_) labels_fast = tf.reshape(labels_fast, [-1]) bbox_targets_h_fast = tf.reshape( bbox_targets_h_, [-1, 4 * (cfgs.CLASS_NUM + 1)]) bbox_targets_r_fast = tf.reshape( bbox_targets_r_, [-1, 5 * (cfgs.CLASS_NUM + 1)]) self.add_roi_batch_img_smry(input_img_batch, rois_fast, labels_fast) #----------------------new_add---------------------- rois[level] = rois_fast labels[level] = labels_fast bbox_targets_h[level] = bbox_targets_h_fast bbox_targets_r[level] = bbox_targets_r_fast labels_all.append(labels_fast) bbox_targets_all_h.append(bbox_targets_h_fast) bbox_targets_all_r.append(bbox_targets_r_fast) fast_labels = tf.concat(labels_all, axis=0) fast_bbox_targets_h = tf.concat(bbox_targets_all_h, axis=0) fast_bbox_targets_r = tf.concat(bbox_targets_all_r, axis=0) # -------------------------------------------------------------------------------------------------------------# # Fast-RCNN # # -------------------------------------------------------------------------------------------------------------# # 5. build Fast-RCNN # rois = tf.Print(rois, [tf.shape(rois)], 'rois shape', summarize=10) bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r = self.build_fastrcnn( feature_to_cropped=self.feature_pyramid, rois_all=rois, img_shape=img_shape) # 这里的feature_to_cropped是feature maps 特征图 # bbox_pred shape: [-1, 4*(cls_num+1)]. # cls_score shape: [-1, cls_num+1] cls_prob_h = slim.softmax(cls_score_h, 'cls_prob_h') # 根据代码可看到水平和旋转的处理过程是分开的 cls_prob_r = slim.softmax(cls_score_r, 'cls_prob_r') # ----------------------------------------------add smry------------------------------------------------------- if self.is_training: cls_category_h = tf.argmax(cls_prob_h, axis=1) fast_acc_h = tf.reduce_mean( tf.to_float(tf.equal(cls_category_h, tf.to_int64(fast_labels)))) tf.summary.scalar('ACC/fast_acc_h', fast_acc_h) cls_category_r = tf.argmax(cls_prob_r, axis=1) fast_acc_r = tf.reduce_mean( tf.to_float(tf.equal(cls_category_r, tf.to_int64(fast_labels)))) tf.summary.scalar('ACC/fast_acc_r', fast_acc_r) # 6. postprocess_fastrcnn if not self.is_training: rois_all = [] for level in self.level: rois_all.append(rois[level]) rois = tf.concat(rois_all, axis=0) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, final_boxes_r, final_scores_r, final_category_r else: ''' when trian. We need build Loss ''' loss_dict = self.build_loss(rpn_box_pred=rpn_all_encode_boxes, rpn_bbox_targets=rpn_bbox_targets, rpn_cls_score=rpn_all_cls_score, rpn_labels=rpn_labels, bbox_pred_h=bbox_pred_h, bbox_targets_h=fast_bbox_targets_h, cls_score_h=cls_score_h, bbox_pred_r=bbox_pred_r, bbox_targets_r=fast_bbox_targets_r, cls_score_r=cls_score_r, labels=fast_labels) rois_all = [] for level in self.level: rois_all.append(rois[level]) rois = tf.concat(rois_all, axis=0) final_boxes_h, final_scores_h, final_category_h = self.postprocess_fastrcnn_h( rois=rois, bbox_ppred=bbox_pred_h, scores=cls_prob_h, img_shape=img_shape) final_boxes_r, final_scores_r, final_category_r = self.postprocess_fastrcnn_r( rois=rois, bbox_ppred=bbox_pred_r, scores=cls_prob_r, img_shape=img_shape) return final_boxes_h, final_scores_h, final_category_h, \ final_boxes_r, final_scores_r, final_category_r, loss_dict
def model(images, text_scale=512, weight_decay=1e-5, is_training=True): """ define the model, we use slim's implemention of resnet """ images = mean_image_subtraction(images) with slim.arg_scope(resnet_v1.resnet_arg_scope(weight_decay=weight_decay)): logits, end_points = resnet_v1.resnet_v1_50(images, is_training=is_training, scope='resnet_v1_50') with tf.variable_scope('feature_fusion', values=[end_points.values]): batch_norm_params = { 'decay': 0.997, 'epsilon': 1e-5, 'scale': True, 'is_training': is_training } with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, weights_regularizer=slim.l2_regularizer(weight_decay)): f = [ end_points['pool5'], end_points['pool4'], end_points['pool3'], end_points['pool2'] ] for i in range(4): print('Shape of f_{} {}'.format(i, f[i].shape)) g = [None, None, None, None] h = [None, None, None, None] num_outputs = [None, 128, 64, 32] for i in range(4): if i == 0: h[i] = f[i] else: c1_1 = slim.conv2d(tf.concat([g[i - 1], f[i]], axis=-1), num_outputs[i], 1) h[i] = slim.conv2d(c1_1, num_outputs[i], 3) if i <= 2: g[i] = unpool(h[i]) else: g[i] = slim.conv2d(h[i], num_outputs[i], 3) print('Shape of h_{} {}, g_{} {}'.format( i, h[i].shape, i, g[i].shape)) # here we use a slightly different way for regression part, # we first use a sigmoid to limit the regression range, and also # this is do with the angle map F_score = slim.conv2d(g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) # 4 channel of axis aligned bbox and 1 channel rotation angle geo_map = slim.conv2d( g[3], 4, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) * text_scale angle_map = (slim.conv2d( g[3], 1, 1, activation_fn=tf.nn.sigmoid, normalizer_fn=None) - 0.5) * np.pi / 2 # angle is between [-45, 45] F_geometry = tf.concat([geo_map, angle_map], axis=-1) return F_score, F_geometry
def build_fastrcnn(self, feature_to_cropped, rois_all, img_shape): with tf.variable_scope('Fast-RCNN'): # 5. ROI Pooling with tf.variable_scope('rois_pooling'): pooled_features = self.roi_pooling( feature_maps=feature_to_cropped, rois=rois_all, img_shape=img_shape) #6. inferecne rois in Fast-RCNN to obtain fc_flatten features if self.base_network_name.startswith('resnet'): fc_flatten = resnet.restnet_head( input=pooled_features, is_training=self.is_training, scope_name=self.base_network_name ) # self.base_network_name #fc_flatten = pooled_features elif self.base_network_name.startswith('MobilenetV2'): fc_flatten = mobilenet_v2.mobilenetv2_head( inputs=pooled_features, is_training=self.is_training) else: raise NotImplementedError('only support resnet and mobilenet') # 7. cls and reg in Fast-RCNN with tf.variable_scope('horizen_branch'): with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): print('*' * 20, fc_flatten.shape) fc6 = slim.fully_connected(fc_flatten, 2048, scope='fc_1') if self.usedropout: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=self.is_training, scope='dropout_1') fc7 = slim.fully_connected(fc6, 2048, scope='fc_2') if self.usedropout: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=self.is_training, scope='dropout_2') cls_score_h = slim.fully_connected( fc7, num_outputs=cfgs.CLASS_NUM + 1, weights_initializer=cfgs.INITIALIZER, activation_fn=None, trainable=self.is_training, scope='cls_fc_h') bbox_pred_h = slim.fully_connected( fc7, num_outputs=(cfgs.CLASS_NUM + 1) * 4, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, trainable=self.is_training, scope='reg_fc_h') # for convient. It also produce (cls_num +1) bboxes cls_score_h = tf.reshape(cls_score_h, [-1, cfgs.CLASS_NUM + 1]) bbox_pred_h = tf.reshape(bbox_pred_h, [-1, 4 * (cfgs.CLASS_NUM + 1)]) with tf.variable_scope('rotation_branch'): with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( cfgs.WEIGHT_DECAY)): cls_score_r = slim.fully_connected( fc_flatten, num_outputs=cfgs.CLASS_NUM + 1, weights_initializer=cfgs.INITIALIZER, activation_fn=None, trainable=self.is_training, scope='cls_fc_r') bbox_pred_r = slim.fully_connected( fc_flatten, num_outputs=(cfgs.CLASS_NUM + 1) * 5, weights_initializer=cfgs.BBOX_INITIALIZER, activation_fn=None, trainable=self.is_training, scope='reg_fc_r') # for convient. It also produce (cls_num +1) bboxes cls_score_r = tf.reshape(cls_score_r, [-1, cfgs.CLASS_NUM + 1]) bbox_pred_r = tf.reshape(bbox_pred_r, [-1, 5 * (cfgs.CLASS_NUM + 1)]) return bbox_pred_h, cls_score_h, bbox_pred_r, cls_score_r
def mobilenet_v2(input, weight_decay, batch_norm_params): features = {} with tf.variable_scope('Mobilenet'): with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], \ activation_fn=tf.nn.relu6,\ weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME'): print('Mobilnet input shape({}): {}'.format(input.name, input.get_shape())) # 96*96*3 112*112*3 conv_1 = slim.convolution2d(input, 32, [3, 3], stride=2, scope='conv_1') print(conv_1.name, conv_1.get_shape()) # 48*48*32 56*56*32 conv2_1 = slim.separable_convolution2d(conv_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv2_1/dwise') print(conv2_1.name, conv2_1.get_shape()) conv2_1 = slim.convolution2d(conv2_1, 16, [1, 1], stride=1, activation_fn=None, scope='conv2_1/linear') print(conv2_1.name, conv2_1.get_shape()) features['feature2'] = conv2_1 # 48*48*16 56*56*16 conv3_1 = slim.convolution2d(conv2_1, 96, [1, 1], stride=1, scope='conv3_1/expand') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.separable_convolution2d(conv3_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_1/dwise') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.convolution2d(conv3_1, 24, [1, 1], stride=1, activation_fn=None, scope='conv3_1/linear') print(conv3_1.name, conv3_1.get_shape()) conv3_2 = slim.convolution2d(conv3_1, 144, [1, 1], stride=1, scope='conv3_2/expand') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.separable_convolution2d(conv3_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_2/dwise') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.convolution2d(conv3_2, 24, [1, 1], stride=1, activation_fn=None, scope='conv3_2/linear') print(conv3_2.name, conv3_2.get_shape()) block_3_2 = conv3_1 + conv3_2 print(block_3_2.name, block_3_2.get_shape()) features['feature3'] = block_3_2 features['pfld'] = block_3_2 # 24*24*24 28*28*24 conv4_1 = slim.convolution2d(block_3_2, 144, [1, 1], stride=1, scope='conv4_1/expand') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.separable_convolution2d(conv4_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_1/dwise') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.convolution2d(conv4_1, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_1/linear') print(conv4_1.name, conv4_1.get_shape()) conv4_2 = slim.convolution2d(conv4_1, 192, [1, 1], stride=1, scope='conv4_2/expand') print(conv4_2.name, conv4_2.get_shape()) conv4_2 = slim.separable_convolution2d(conv4_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_2/dwise') print(conv4_2.name, conv4_2.get_shape()) conv4_2 = slim.convolution2d(conv4_2, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_2/linear') print(conv4_2.name, conv4_2.get_shape()) block_4_2 = conv4_1 + conv4_2 print(block_4_2.name, block_4_2.get_shape()) conv4_3 = slim.convolution2d(block_4_2, 192, [1, 1], stride=1, scope='conv4_3/expand') print(conv4_3.name, conv4_3.get_shape()) conv4_3 = slim.separable_convolution2d(conv4_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_3/dwise') print(conv4_3.name, conv4_3.get_shape()) conv4_3 = slim.convolution2d(conv4_3, 32, [1, 1], stride=1, activation_fn=None, scope='conv4_3/linear') print(conv4_3.name, conv4_3.get_shape()) block_4_3 = block_4_2 + conv4_3 print(block_4_3.name, block_4_3.get_shape()) # 12*12*32 14*14*32 features['feature4'] = block_4_3 conv5_1 = slim.convolution2d(block_4_3, 192, [1, 1], stride=1, scope='conv5_1/expand') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.separable_convolution2d(conv5_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_1/dwise') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.convolution2d(conv5_1, 64, [1, 1], stride=1,activation_fn=None, scope='conv5_1/linear') print(conv5_1.name, conv5_1.get_shape()) conv5_2 = slim.convolution2d(conv5_1, 384, [1, 1], stride=1, scope='conv5_2/expand') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.separable_convolution2d(conv5_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_2/dwise') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.convolution2d(conv5_2, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_2/linear') print(conv5_2.name, conv5_2.get_shape()) block_5_2 = conv5_1 + conv5_2 print(block_5_2.name, block_5_2.get_shape()) conv5_3 = slim.convolution2d(block_5_2, 384, [1, 1], stride=1, scope='conv5_3/expand') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.separable_convolution2d(conv5_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_3/dwise') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.convolution2d(conv5_3, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_3/linear') print(conv5_3.name, conv5_3.get_shape()) block_5_3 = block_5_2 + conv5_3 print(block_5_3.name, block_5_3.get_shape()) conv5_4 = slim.convolution2d(block_5_3, 384, [1, 1], stride=1, scope='conv5_4/expand') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.separable_convolution2d(conv5_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_4/dwise') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.convolution2d(conv5_4, 64, [1, 1], stride=1, activation_fn=None, scope='conv5_4/linear') print(conv5_4.name, conv5_4.get_shape()) block_5_4 = block_5_3 + conv5_4 print(block_5_4.name, block_5_4.get_shape()) # 6*6*64 7*7*64 conv6_1 = slim.convolution2d(block_5_4, 384, [1, 1], stride=1, scope='conv6_1/expand') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.separable_convolution2d(conv6_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_1/dwise') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.convolution2d(conv6_1, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_1/linear') print(conv6_1.name, conv6_1.get_shape()) conv6_2 = slim.convolution2d(conv6_1, 576, [1, 1], stride=1, scope='conv6_2/expand') print(conv6_2.name, conv6_2.get_shape()) conv6_2 = slim.separable_convolution2d(conv6_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_2/dwise') print(conv6_2.name, conv6_2.get_shape()) conv6_2 = slim.convolution2d(conv6_2, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_2/linear') print(conv6_2.name, conv6_2.get_shape()) block_6_2 = conv6_1 + conv6_2 print(block_6_2.name, block_6_2.get_shape()) conv6_3 = slim.convolution2d(block_6_2, 576, [1, 1], stride=1, scope='conv6_3/expand') print(conv6_3.name, conv6_3.get_shape()) conv6_3 = slim.separable_convolution2d(conv6_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_3/dwise') print(conv6_3.name, conv6_3.get_shape()) conv6_3 = slim.convolution2d(conv6_3, 96, [1, 1], stride=1, activation_fn=None, scope='conv6_3/linear') print(conv6_3.name, conv6_3.get_shape()) block_6_3 = block_6_2 + conv6_3 print(block_6_3.name, block_6_3.get_shape()) features['feature5'] = block_6_3 # 6*6*96 7*7*96 conv7_1 = slim.convolution2d(block_6_3, 576, [1, 1], stride=1, scope='conv7_1/expand') print(conv7_1.name, conv7_1.get_shape()) conv7_1 = slim.separable_convolution2d(conv7_1, num_outputs=None, stride=2, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_1/dwise') print(conv7_1.name, conv7_1.get_shape()) conv7_1 = slim.convolution2d(conv7_1, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_1/linear') print(conv7_1.name, conv7_1.get_shape()) conv7_2 = slim.convolution2d(conv7_1, 960, [1, 1], stride=1, scope='conv7_2/expand') print(conv7_2.name, conv7_2.get_shape()) conv7_2 = slim.separable_convolution2d(conv7_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_2/dwise') print(conv7_2.name, conv7_2.get_shape()) conv7_2 = slim.convolution2d(conv7_2, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_2/linear') print(conv7_2.name, conv7_2.get_shape()) block_7_2 = conv7_1 + conv7_2 print(block_7_2.name, block_7_2.get_shape()) conv7_3 = slim.convolution2d(block_7_2, 960, [1, 1], stride=1, scope='conv7_3/expand') print(conv7_3.name, conv7_3.get_shape()) conv7_3 = slim.separable_convolution2d(conv7_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_3/dwise') print(conv7_3.name, conv7_3.get_shape()) conv7_3 = slim.convolution2d(conv7_3, 160, [1, 1], stride=1, activation_fn=None, scope='conv7_3/linear') print(conv7_3.name, conv7_3.get_shape()) block_7_3 = block_7_2 + conv7_3 print(block_7_3.name, block_7_3.get_shape()) conv7_4 = slim.convolution2d(block_7_3, 960, [1, 1], stride=1, scope='conv7_4/expand') print(conv7_4.name, conv7_4.get_shape()) conv7_4 = slim.separable_convolution2d(conv7_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv7_4/dwise') print(conv7_4.name, conv7_4.get_shape()) conv7_4 = slim.convolution2d(conv7_4, 320, [1, 1], stride=1, activation_fn=None, scope='conv7_4/linear') print(conv7_4.name, conv7_4.get_shape()) features['feature6'] = conv7_4 return features
def pfld_inference(input, weight_decay, batch_norm_params): coefficient = 1 with tf.variable_scope('pfld_inference'): features = {} with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], activation_fn=tf.nn.relu6, weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, padding='SAME'): print('PFLD input shape({}): {}'.format(input.name, input.get_shape())) # 112*112*3 conv1 = slim.convolution2d(input, 64*coefficient, [3, 3], stride=2, scope='conv_1') print(conv1.name, conv1.get_shape()) # 56*56*64 conv2 = slim.separable_convolution2d(conv1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv2/dwise') print(conv2.name, conv2.get_shape()) # 56*56*64 conv3_1 = slim.convolution2d(conv2, 128, [1, 1], stride=2, activation_fn=None, scope='conv3_1/expand') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.separable_convolution2d(conv3_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_1/dwise') print(conv3_1.name, conv3_1.get_shape()) conv3_1 = slim.convolution2d(conv3_1, 64*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_1/linear') print(conv3_1.name, conv3_1.get_shape()) conv3_2 = slim.convolution2d(conv3_1, 128, [1, 1], stride=1, activation_fn=None, scope='conv3_2/expand') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.separable_convolution2d(conv3_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_2/dwise') print(conv3_2.name, conv3_2.get_shape()) conv3_2 = slim.convolution2d(conv3_1, 64*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_2/linear') print(conv3_2.name, conv3_2.get_shape()) block3_2 = conv3_1 + conv3_2 print(block3_2.name, block3_2.get_shape()) conv3_3 = slim.convolution2d(block3_2, 128, [1, 1], stride=1, activation_fn=None, scope='conv3_3/expand') print(conv3_3.name, conv3_3.get_shape()) conv3_3 = slim.separable_convolution2d(conv3_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_3/dwise') print(conv3_3.name, conv3_3.get_shape()) conv3_3 = slim.convolution2d(conv3_3, 64*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_3linear') print(conv3_3.name, conv3_3.get_shape()) block3_3 = block3_2 + conv3_3 print(block3_3.name, block3_3.get_shape()) conv3_4 = slim.convolution2d(block3_3, 128, [1, 1], stride=1, activation_fn=None, scope='conv3_4/expand') print(conv3_4.name, conv3_4.get_shape()) conv3_4 = slim.separable_convolution2d(conv3_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_4/dwise') print(conv3_4.name, conv3_4.get_shape()) conv3_4 = slim.convolution2d(conv3_4, 64*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_4/linear') print(conv3_4.name, conv3_4.get_shape()) block3_4 = block3_3 + conv3_4 print(block3_4.name, block3_4.get_shape()) conv3_5 = slim.convolution2d(block3_4, 128, [1, 1], stride=1, activation_fn=None, scope='conv3_5/expand') print(conv3_5.name, conv3_5.get_shape()) conv3_5 = slim.separable_convolution2d(conv3_5, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv3_5/dwise') print(conv3_5.name, conv3_5.get_shape()) conv3_5 = slim.convolution2d(conv3_5, 64*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv3_5/linear') print(conv3_5.name, conv3_5.get_shape()) block3_5 = block3_4 + conv3_5 print(block3_5.name, block3_5.get_shape()) features['auxiliary_input'] = block3_5 #28*28*64 conv4_1 = slim.convolution2d(block3_5, 128, [1, 1], stride=2, activation_fn=None, scope='conv4_1/expand') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.separable_convolution2d(conv4_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv4_1/dwise') print(conv4_1.name, conv4_1.get_shape()) conv4_1 = slim.convolution2d(conv4_1, 128*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv4_1/linear') print(conv4_1.name, conv4_1.get_shape()) #14*14*128 conv5_1 = slim.convolution2d(conv4_1, 512, [1, 1], stride=1, activation_fn=None, scope='conv5_1/expand') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.separable_convolution2d(conv5_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_1/dwise') print(conv5_1.name, conv5_1.get_shape()) conv5_1 = slim.convolution2d(conv5_1, 128*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_1/linear') print(conv5_1.name, conv5_1.get_shape()) conv5_2 = slim.convolution2d(conv5_1, 512, [1, 1], stride=1, activation_fn=None, scope='conv5_2/expand') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.separable_convolution2d(conv5_2, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_2/dwise') print(conv5_2.name, conv5_2.get_shape()) conv5_2 = slim.convolution2d(conv5_2, 128*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_2/linear') print(conv5_2.name, conv5_2.get_shape()) block5_2 = conv5_1 + conv5_2 print(block5_2.name, block5_2.get_shape()) conv5_3 = slim.convolution2d(block5_2, 512, [1, 1], stride=1, activation_fn=None, scope='conv5_3/expand') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.separable_convolution2d(conv5_3, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_3/dwise') print(conv5_3.name, conv5_3.get_shape()) conv5_3 = slim.convolution2d(conv5_3, 128*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_3/linear') print(conv5_3.name, conv5_3.get_shape()) block5_3 = block5_2 + conv5_3 print(block5_3.name, block5_3.get_shape()) conv5_4 = slim.convolution2d(block5_3, 512, [1, 1], stride=1, activation_fn=None, scope='conv5_4/expand') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.separable_convolution2d(conv5_4, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_4/dwise') print(conv5_4.name, conv5_4.get_shape()) conv5_4 = slim.convolution2d(conv5_4, 128*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_4/linear') print(conv5_4.name, conv5_4.get_shape()) block5_4 = block5_3 + conv5_4 print(block5_4.name, block5_4.get_shape()) conv5_5 = slim.convolution2d(block5_4, 512, [1, 1], stride=1, activation_fn=None, scope='conv5_5/expand') print(conv5_5.name, conv5_5.get_shape()) conv5_5 = slim.separable_convolution2d(conv5_5, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_5/dwise') print(conv5_5.name, conv5_5.get_shape()) conv5_5 = slim.convolution2d(conv5_5, 128*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_5/linear') print(conv5_5.name, conv5_5.get_shape()) block5_5 = block5_4 + conv5_5 print(block5_5.name, block5_5.get_shape()) conv5_6 = slim.convolution2d(block5_5, 512, [1, 1], stride=1, activation_fn=None, scope='conv5_6/expand') print(conv5_6.name, conv5_6.get_shape()) conv5_6 = slim.separable_convolution2d(conv5_6, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv5_6/dwise') print(conv5_6.name, conv5_6.get_shape()) conv5_6 = slim.convolution2d(conv5_6, 128*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv5_6/linear') print(conv5_6.name, conv5_6.get_shape()) block5_6 = block5_5 + conv5_6 print(block5_6.name, block5_6.get_shape()) #14*14*128 conv6_1 = slim.convolution2d(block5_6, 256, [1, 1], stride=1, activation_fn=None, scope='conv6_1/expand') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.separable_convolution2d(conv6_1, num_outputs=None, stride=1, depth_multiplier=1, kernel_size=[3, 3], scope='conv6_1/dwise') print(conv6_1.name, conv6_1.get_shape()) conv6_1 = slim.convolution2d(conv6_1, 16*coefficient, [1, 1], stride=1, activation_fn=None, scope='conv6_1/linear') print(conv6_1.name, conv6_1.get_shape()) #14*14*16 conv7 = slim.convolution2d(conv6_1, 32*coefficient, [3, 3], stride=2, activation_fn=None, scope='conv7') print(conv7.name, conv7.get_shape()) #7*7*32 conv8 = slim.convolution2d(conv7, 128*coefficient, [7, 7], stride=1, activation_fn=None, padding='valid', scope='conv8') print(conv8.name, conv8.get_shape()) # avg_pool1 = slim.avg_pool2d(conv6_1, [conv6_1.get_shape()[1], conv6_1.get_shape()[2]], stride=1) # print(avg_pool1.name, avg_pool1.get_shape()) # # avg_pool2 = slim.avg_pool2d(conv7,[conv7.get_shape()[1],conv7.get_shape()[2]],stride=1) # print(avg_pool2.name,avg_pool2.get_shape()) # # s1 = slim.flatten(avg_pool1) # s2 = slim.flatten(avg_pool2) s1 = slim.flatten(conv6_1) s2 = slim.flatten(conv7) #1*1*128 s3 = slim.flatten(conv8) multi_scale = tf.concat([s1, s2, s3], 1) landmarks = slim.fully_connected(multi_scale, num_outputs=136, activation_fn=None, scope='fc') print(landmarks.name, landmarks.get_shape()) return features, landmarks
def main(args): network = importlib.import_module(args.model_def) with tf.Graph().as_default(): with tf.Session() as sess: # Load the model metagraph and checkpoint image_batch = tf.placeholder(tf.float32, shape=(None, args.image_size, args.image_size, 3), name='input') # Build the inference graph batch_norm_params = { # Decay for the moving averages 'decay': 0.995, # epsilon to prevent 0s in variance 'epsilon': 0.001, # force in-place updates of mean and variance estimates 'updates_collections': None, # Moving averages ends up in the trainable variables collection 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], # Only update statistics during training mode 'is_training': False } prelogits, _ = network.inference(image_batch, args.keep_probability, phase_train=False, weight_decay=args.weight_decay) bottleneck = slim.fully_connected( prelogits, args.embedding_size, activation_fn=None, weights_initializer=tf.truncated_normal_initializer( stddev=0.1), weights_regularizer=slim.l2_regularizer(args.weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, scope='Bottleneck', reuse=False) embeddings = tf.nn.l2_normalize(bottleneck, 1, 1e-10, name='embeddings') print('Model directory: %s' % args.model_dir) # _, ckpt_file = get_model_filenames(os.path.expanduser(args.model_dir)) meta_file, ckpt_file = facenet.get_model_filenames( os.path.expanduser(args.model_dir) ) #clpham:to fix "Key Bottleneck/BatchNorm/beta not found..." print('Checkpoint file: %s' % ckpt_file) model_dir_exp = os.path.expanduser(args.model_dir) # saver = tf.train.Saver() saver = tf.train.import_meta_graph( os.path.join(model_dir_exp, meta_file), clear_devices=True ) #clpham:to fix "Key Bottleneck/BatchNorm/beta not found" tf.get_default_session().run(tf.global_variables_initializer()) tf.get_default_session().run(tf.local_variables_initializer()) saver.restore(tf.get_default_session(), os.path.join(model_dir_exp, ckpt_file)) # Retrieve the protobuf graph definition and fix the batch norm nodes gd = sess.graph.as_graph_def() for node in gd.node: if node.op == 'RefSwitch': node.op = 'Switch' for index in range( len(node.input )): #clpham: was=xrange, to support python3 if 'moving_' in node.input[index]: node.input[index] = node.input[index] + '/read' elif node.op == 'AssignSub': node.op = 'Sub' if 'use_locking' in node.attr: del node.attr['use_locking'] elif node.op == 'AssignAdd': node.op = 'Add' if 'use_locking' in node.attr: del node.attr['use_locking'] # Get the list of important nodes output_node_names = 'embeddings' whitelist_names = [] for node in gd.node: if node.name.startswith( 'InceptionResnetV1') or node.name.startswith( 'embeddings') or node.name.startswith( 'phase_train') or node.name.startswith( 'Bottleneck'): print(node.name) whitelist_names.append(node.name) # Replace all the variables in the graph with constants of the same values output_graph_def = graph_util.convert_variables_to_constants( sess, gd, output_node_names.split(","), variable_names_whitelist=whitelist_names) # Serialize and dump the output graph to the filesystem with tf.gfile.GFile(args.output_file, 'wb') as f: f.write(output_graph_def.SerializeToString()) print("%d ops in the final graph." % len(output_graph_def.node))
def model(): x = tf.placeholder(dtype=tf.float32, shape=[batch_size, 32, 32, 3], name='Input') y = tf.placeholder(dtype=tf.float32, shape=[batch_size], name='True_Y') y = tf.cast(y, tf.int64) keep_prob = tf.placeholder(dtype=tf.float32, shape=(), name='dropout') is_training = tf.placeholder(tf.bool, shape=()) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.crelu, normalizer_fn=slim.batch_norm, normalizer_params={ 'is_training': is_training, 'decay': 0.95 }): h = slim.conv2d(inputs=x, num_outputs=24, kernel_size=2, weights_regularizer=slim.l2_regularizer(0.0016)) h = slim.conv2d(inputs=h, num_outputs=57, kernel_size=3, weights_regularizer=slim.l2_regularizer(0.0001)) h = slim.conv2d(inputs=h, num_outputs=63, kernel_size=5, weights_regularizer=slim.l2_regularizer(0.0096)) h = slim.conv2d(inputs=h, num_outputs=35, kernel_size=5, weights_regularizer=slim.l2_regularizer(0.0071)) h = slim.conv2d(inputs=h, num_outputs=76, kernel_size=3, weights_regularizer=slim.l2_regularizer(0.0015)) h = slim.max_pool2d(h, kernel_size=2, stride=2) flatten = slim.flatten(h) full = slim.fully_connected(flatten, 512) drop_full = slim.dropout(full, keep_prob) with tf.name_scope('accuracy'): logits = slim.fully_connected(drop_full, 10, activation_fn=None) correct_prediction = tf.equal(tf.argmax(logits, 1), y) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) with tf.name_scope('loss'): loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y, logits=logits)) + tf.add_n( tf.losses.get_regularization_losses()) with tf.name_scope('train'): optimizer = tf.train.AdamOptimizer() step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) train_op = slim.learning.create_train_op(loss, optimizer, global_step=step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) with tf.Session() as sess: sess.run(tf.global_variables_initializer()) train_data, train_label = get_data.get_train_data(True) validate_data, validate_label = get_data.get_test_data(True) epochs = total_epochs for current_epoch in range(epochs): train_loss_list = [] train_accu_list = [] total_length = train_data.shape[0] idx = np.arange(total_length) np.random.shuffle(idx) train_data = train_data[idx] train_label = train_label[idx] total_steps = total_length // batch_size for step in range(total_steps): batch_train_data = train_data[step * batch_size:(step + 1) * batch_size] batch_train_label = train_label[step * batch_size:(step + 1) * batch_size] _, loss_v, accuracy_str = sess.run( [train_op, loss, accuracy], { x: batch_train_data, y: batch_train_label, keep_prob: 0.5, is_training: True }) train_loss_list.append(loss_v) train_accu_list.append(accuracy_str) #test test_length = validate_data.shape[0] test_steps = test_length // batch_size test_loss_list = [] test_accu_list = [] for step in range(test_steps): batch_test_data = validate_data[step * batch_size:(step + 1) * batch_size] batch_test_label = validate_label[step * batch_size:(step + 1) * batch_size] loss_v, accuracy_str = sess.run( [loss, accuracy], { x: batch_test_data, y: batch_test_label, keep_prob: 1.0, is_training: False }) test_loss_list.append(loss_v) test_accu_list.append(accuracy_str) print( '{}, epoch:{}/{}, step:{}/{}, loss:{:.6f}, accu:{:.4f}, test loss:{:.6f}, accu:{:.4f}' .format(datetime.now(), current_epoch, total_epochs, total_steps * current_epoch + step, total_steps * epochs, np.mean(train_loss_list), np.mean(train_accu_list), np.mean(test_loss_list), np.mean(test_accu_list)))
def forward(self, inputs, is_training=False, reuse=False): # the input img_size, form: [height, weight] self.img_size = tf.shape(inputs)[1:3] # set batch norm params batch_norm_params = { 'decay': self.batch_norm_decay, 'epsilon': 1e-05, 'scale': True, 'is_training': is_training, 'fused': None, # Use fused batch norm if possible. } with slim.arg_scope([slim.conv2d, slim.batch_norm], reuse=reuse): with slim.arg_scope( [slim.conv2d], normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params, biases_initializer=None, activation_fn=lambda x: tf.nn.leaky_relu(x, alpha=0.1)): with tf.variable_scope('darknet53_body'): route_1, route_2, route_3 = darknet53_body(inputs) with tf.variable_scope('yolov3_head'): inter1, net = yolo_block(route_3, 512) feature_map_1 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, weights_regularizer=slim.l2_regularizer(0.001), weights_initializer=tf.contrib.layers. xavier_initializer(), biases_initializer=tf.zeros_initializer()) feature_map_1 = tf.identity(feature_map_1, name='feature_map_1') inter1 = conv2d(inter1, 256, 1) inter1 = upsample_layer(inter1, route_2.get_shape().as_list()) concat1 = tf.concat([inter1, route_2], axis=3) inter2, net = yolo_block(concat1, 256) feature_map_2 = slim.conv2d( net, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, weights_regularizer=slim.l2_regularizer(0.001), weights_initializer=tf.contrib.layers. xavier_initializer(), biases_initializer=tf.zeros_initializer()) feature_map_2 = tf.identity(feature_map_2, name='feature_map_2') inter2 = conv2d(inter2, 128, 1) inter2 = upsample_layer(inter2, route_1.get_shape().as_list()) concat2 = tf.concat([inter2, route_1], axis=3) _, feature_map_3 = yolo_block(concat2, 128) feature_map_3 = slim.conv2d( feature_map_3, 3 * (5 + self.class_num), 1, stride=1, normalizer_fn=None, activation_fn=None, weights_regularizer=slim.l2_regularizer(0.001), weights_initializer=tf.contrib.layers. xavier_initializer(), biases_initializer=tf.zeros_initializer()) feature_map_3 = tf.identity(feature_map_3, name='feature_map_3') return feature_map_1, feature_map_2, feature_map_3
with tf.Session(config=cuda_set(GPU)) as sess: N = tf.placeholder(tf.float32, [None] + tensor_shape['neck'], name='neck') Y = tf.placeholder(tf.float32, [None] + tensor_shape['mid'], name='mid') C = tf.placeholder(tf.int32, [None], name='C') is_train = tf.placeholder(tf.bool, name='is_training') label = tf.squeeze(tf.one_hot(C, c, 1.0, 0.0, 1, tf.float32), name='label') Z = PSAM_Fusion(N, Y) # Z = PSAM_TOP(Y) #Z = slim.dropout(Z, 0.8, is_training=is_train,scope='fc_drop') Z = tf.expand_dims(tf.expand_dims(Z, 1), 1) print(Z) with slim.arg_scope([slim.conv2d], activation_fn=None, weights_initializer=tf.truncated_normal_initializer( 0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.01)): logits_Z = slim.conv2d(Z, c, [1, 1], scope='fc_Z') logits_Z = tf.squeeze(logits_Z, [1, 2]) score = tf.nn.softmax(logits_Z, name='softmax') # pre_cross = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=label,logits=logits_N)) # slim.losses.add_loss(pre_cross*0.1) loss_cross = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits(labels=label, logits=logits_Z)) slim.losses.add_loss(loss_cross) # slim.losses.add_loss(loss_tight*0.05) total_loss = slim.losses.get_total_loss() #+ loss_tight * tight_a train_op = tf.train.AdamOptimizer(LEARNING_RATE).minimize(total_loss) correct_prediction = tf.equal(tf.argmax(label, 1), tf.argmax(logits_Z, 1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) output = tf.squeeze(logits_Z, name='output') sess.run(tf.global_variables_initializer())
def __ctpn_base(self): """ 特征提取层 feature extract layer :return: proposal_predicted : shape = [1, h, w, A*4 proposal_cls_score: shape = [1, h, w, A*cfg["CLASSES_NUM"]] proposal_cls_prob: shape = [1, h, w, A*cfg["CLASSES_NUM"]] """ stddev = 0.01 weight_decay = cfg["TRAIN"]["WEIGHT_DECAY"] assert cfg["ANCHOR_WIDTH"] == 8 or cfg["ANCHOR_WIDTH"] == 16, \ 'Anchor must be 8 or 16!Not be {}.'.format(cfg["ANCHOR_WIDTH"]) with tf.variable_scope("CTPN_Network"): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( 0.0, stddev=stddev), weights_regularizer=slim.l2_regularizer(weight_decay)): if cfg["BACKBONE"] == "InceptionNet": features, featuremap_scale = inception_base(self.img_input) elif cfg["BACKBONE"] == "VggNet": features, featuremap_scale = vgg_base(self.img_input) else: assert 0, "error: backbone {} is not support!".format( cfg["BACKBONE"]) print('featuremap_scale is {}, anchor width is {}'.format( featuremap_scale, cfg['ANCHOR_WIDTH'])) assert featuremap_scale == cfg['ANCHOR_WIDTH'] print("using {} backbone...".format(cfg["BACKBONE"])) features = slim.conv2d(features, 512, [3, 3], scope='rpn_conv_3x3') if cfg["USE_LSTM"]: features = self.__bilstm(features, 512, 128, 512) else: features = self.__semantic_info_extract_layer(features) print('Lstm is using?', cfg["USE_LSTM"]) proposal_predicted = self._lstm_fc(features, 512, 10 * 4, scope_name="bbox_pred") proposal_cls_score = self._lstm_fc(features, 512, 10 * 2, scope_name="cls_pred") # # proposal_predicted shape = [1, h, w, A*4] # proposal_predicted = slim.conv2d(features, len(cfg["ANCHOR_HEIGHT"]) * 4, [1, 1], scope='proposal_conv_1x1', activation_fn=None) # # proposal_cls_score shape = [1, h, w, A*cfg["CLASSES_NUM"]] # proposal_cls_score = slim.conv2d(features, len(cfg["ANCHOR_HEIGHT"]) * cfg["CLASSES_NUM"], [1, 1], scope='cls_conv_1x1', activation_fn=None) proposal_cls_score_shape = tf.shape(proposal_cls_score) # proposal_cls_score_reshape shape = [h*w*A, cfg["CLASSES_NUM"]] proposal_cls_score_reshape = tf.reshape(proposal_cls_score, [ proposal_cls_score_shape[0], proposal_cls_score_shape[1], -1, cfg["CLASSES_NUM"] ]) proposal_cls_score_reshape_shape = tf.shape( proposal_cls_score_reshape) proposal_cls_score_reshape = tf.reshape(proposal_cls_score_reshape, [-1, cfg["CLASSES_NUM"]]) # proposal_cls_prob shape = [1, h, w, A*cfg["CLASSES_NUM"]] proposal_cls_prob = tf.reshape( tf.nn.softmax(proposal_cls_score_reshape), [ -1, proposal_cls_score_reshape_shape[1], proposal_cls_score_reshape_shape[2], proposal_cls_score_reshape_shape[3] ]) return proposal_predicted, proposal_cls_score, proposal_cls_prob
def model(): is_training = tf.placeholder(tf.bool, []) train_images, train_label = data.get_train_data(batch_size) test_images, test_label = data.get_test_data(batch_size) x = tf.cond(is_training, lambda: train_images, lambda: test_images) y_ = tf.cond(is_training, lambda: train_label, lambda: test_label) y_ = tf.cast(y_, tf.int64) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.crelu, normalizer_fn=slim.batch_norm, weights_regularizer=slim.l2_regularizer(0.005), normalizer_params={ 'is_training': is_training, 'decay': 0.95 }): conv1 = slim.conv2d( x, 48, [9, 9], weights_initializer=tf.truncated_normal_initializer(mean=-0.08, stddev=0.63)) pool1 = slim.max_pool2d(conv1, [4, 4], stride=4, padding='SAME') conv2 = slim.conv2d( pool1, 43, [7, 7], weights_initializer=tf.truncated_normal_initializer(mean=-0.23, stddev=0.22)) pool2 = slim.max_pool2d(conv2, [4, 4], stride=4, padding='SAME') pool3 = slim.avg_pool2d(pool2, [3, 3], stride=3, padding='SAME') flatten = slim.flatten(pool3) logits = slim.fully_connected( flatten, 2, activation_fn=None, weights_initializer=tf.truncated_normal_initializer( mean=0.726, stddev=0.397992), biases_initializer=tf.constant_initializer(0.1, dtype=tf.float32)) correct_prediction = tf.equal(tf.argmax(logits, 1), y_) accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32)) regularization_loss = tf.add_n(slim.losses.get_regularization_losses()) cross_entropy = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits( labels=y_, logits=logits)) + regularization_loss step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) # lr = tf.train.exponential_decay(0.1, # step, # 550*30, # 0.9, # staircase=True) # # # optimizer = tf.train.GradientDescentOptimizer(lr) optimizer = tf.train.AdamOptimizer(0.001) # lr_summary = tf.summary.scalar('lr', lr) train_step = slim.learning.create_train_op(cross_entropy, optimizer, global_step=step) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) cross_entropy = control_flow_ops.with_dependencies([updates], cross_entropy) loss_summary = tf.summary.scalar('loss', cross_entropy) accuracy_summary = tf.summary.scalar('accuracy', accuracy) merge_summary = tf.summary.merge([loss_summary, accuracy_summary]) return is_training, train_step, step, accuracy, cross_entropy, merge_summary
import tensorflow as tf import numpy as np import tensorflow.contrib.slim as slim import os from tensorflow.contrib.data import Dataset from tensorflow.contrib.layers import conv2d reg = slim.l2_regularizer(scale=0.001) def crop_and_concat(x1, x2): with tf.name_scope("crop_and_concat"): x1_shape = tf.shape(x1) x2_shape = tf.shape(x2) # offsets for the top left corner of the crop offsets = [ 0, (x1_shape[1] - x2_shape[1]) // 2, (x1_shape[2] - x2_shape[2]) // 2, 0 ] size = [-1, x2_shape[1], x2_shape[2], -1] x1_crop = tf.slice(x1, offsets, size) return tf.concat([x1_crop, x2], 3) def standard_unit(inputs, stage, nb_filter, kernel_size=3): x = slim.conv2d(inputs, nb_filter, [3, 3], rate=1, activation_fn=None, weights_regularizer=reg) x = slim.batch_norm(x)
def O_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): print('O_Net') with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print(inputs.get_shape()) net = slim.conv2d(inputs, num_outputs=32, kernel_size=[3, 3], stride=1, scope="conv1") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool1", padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope="conv2") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=2, scope="pool2") print(net.get_shape()) net = slim.conv2d(net, num_outputs=64, kernel_size=[3, 3], stride=1, scope="conv3") print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope="pool3", padding='SAME') print(net.get_shape()) net = slim.conv2d(net, num_outputs=128, kernel_size=[2, 2], stride=1, scope="conv4") print(net.get_shape()) fc_flatten = slim.flatten(net) print(fc_flatten.get_shape()) fc1 = slim.fully_connected(fc_flatten, num_outputs=256, scope="fc1") print(fc1.get_shape()) #batch*2 cls_prob = slim.fully_connected(fc1, num_outputs=2, scope="cls_fc", activation_fn=tf.nn.softmax) print('cls_fc', cls_prob.get_shape()) #batch*4 bbox_pred = slim.fully_connected(fc1, num_outputs=4, scope="bbox_fc", activation_fn=None) print('bbox_fc', bbox_pred.get_shape()) #batch*10 landmark_pred = slim.fully_connected(fc1, num_outputs=(no_landmarks * 2), scope="landmark_fc", activation_fn=None) print('landmark_fc', landmark_pred.get_shape()) #train if training: cls_loss = cls_ohem(cls_prob, label) bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) accuracy = cal_accuracy(cls_prob, label) landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy, landmark_pred else: return cls_prob, bbox_pred, landmark_pred
def build_network(images, num_classes=default.num_classes, training=None): tf.logging.info("Loading CNN Model") if config.stn: tf.logging.info("Start to loading stn network") # locnet with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer( stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), biases_initializer=None): with tf.variable_scope('Loc_Net'): n_fc = 6 #B, H, W, C = images.shape # identity transform initial = np.array([[1., 0, 0], [0, 1., 0]]) initial = initial.astype('float32').flatten() # Output Layer Transformation # localization network # 64 x 128 avg_net = slim.avg_pool2d(images, kernel_size=2, stride=2, scope="pool1") # 32 x 64 conv1_1_net = slim.conv2d(avg_net, 32, kernel_size=3, stride=4, scope='conv1_1') # 8 x 16 conv1_2_net = slim.conv2d(images, 32, kernel_size=5, stride=8, scope='conv1_2') loc_concat_net = tf.concat([conv1_1_net, conv1_2_net], 3, name='concat') #loc_net = slim.repeat(images, 2, slim.conv2d, 32, kernel_size=3, stride=1, scope='loc_conv1') #loc_net = slim.max_pool2d(loc_net, kernel_size=2, stride=2, scope='loc_pool1') # 8 x 16 loc_net = slim.conv2d(conv1_2_net, 128, kernel_size=3, stride=1, scope='conv3') loc_net = slim.batch_norm(loc_net, decay=_BATCH_DECAY, is_training=training, scope='bn1') loc_net = slim.conv2d(loc_net, 32, kernel_size=3, stride=1, scope='conv4') loc_net = slim.batch_norm(loc_net, decay=_BATCH_DECAY, is_training=training, scope='bn2') loc_net = slim.max_pool2d(loc_net, kernel_size=5, stride=4, scope='pool3') # 2 x 4 loc_net = slim.conv2d(loc_net, 16, kernel_size=3, stride=1, scope='conv5') loc_net = tf.reduce_mean(input_tensor=loc_net, axis=[1, 2], keep_dims=False, name="se_pool1") loc_net = tf.reshape(loc_net, [loc_net.shape[0], -1]) loc_B, loc_W = loc_net.shape W_fc1 = tf.Variable(tf.zeros([loc_W, n_fc]), name='W_fc1') b_fc1 = tf.Variable(initial_value=initial, name='b_fc1') loc_net = tf.matmul(loc_net, W_fc1) + b_fc1 loc_output = spatial_transformer_network(images, loc_net) images = loc_output tf.logging.info("stn network loaded...") # 1 x 2 if config.rgb: tf.logging.info("Start to loading Init rgb network") # rgbnet with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer( stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), biases_initializer=None): with tf.variable_scope('RGB_Net'): # identity transform # 64 x 128 avg_net = slim.avg_pool2d(images, kernel_size=2, stride=2, scope="pool1") # 32 x 64 conv1_1_net = slim.conv2d(avg_net, 32, kernel_size=3, stride=4, scope='conv1_1') # 8 x 16 conv1_2_net = slim.conv2d(images, 32, kernel_size=5, stride=8, scope='conv1_2') rgb_concat_net = tf.concat([conv1_1_net, conv1_2_net], 3, name='concat') #loc_net = slim.repeat(images, 2, slim.conv2d, 32, kernel_size=3, stride=1, scope='loc_conv1') #loc_net = slim.max_pool2d(loc_net, kernel_size=2, stride=2, scope='loc_pool1') # 8 x 16 rgb_output = channel_wise_attention(rgb_concat_net, images, "RGB") images = rgb_output tf.logging.info("stn network loaded...") # 1 x 2 # first apply the cnn feature extraction stage with slim.arg_scope( [slim.conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), biases_initializer=None): with tf.variable_scope('FEN'): tf.logging.info("Start to loading cnn feature extraction network") net = slim.repeat(images, 2, slim.conv2d, 64, kernel_size=3, stride=1, scope='conv1') net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool1') # 32 x 64 net = slim.repeat(net, 2, slim.conv2d, 128, kernel_size=3, stride=1, scope='conv2') C1 = net net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool2') # 16 x 32 net = slim.repeat(net, 3, slim.conv2d, 256, kernel_size=3, stride=1, scope='conv3') C2 = net net = slim.max_pool2d(net, kernel_size=2, stride=2, scope='pool3') # 8 x 16 net = slim.repeat(net, 3, slim.conv2d, 512, kernel_size=3, stride=1, scope='conv4') C3 = net net = slim.max_pool2d(net, kernel_size=[2, 1], stride=[2, 1], scope='pool4') # 4 x 16 net = slim.repeat(net, 3, slim.conv2d, 512, kernel_size=3, stride=1, scope='conv5') C4 = net net = slim.max_pool2d(net, kernel_size=[2, 1], stride=[2, 1], scope='pool5') # 1 x 16 C5 = net C1 = slim.conv2d(C1, 64, kernel_size=3, scope='C1_conv') C1 = slim.batch_norm(C1, decay=_BATCH_DECAY, is_training=training, scope='C1_BN') C2 = slim.conv2d(C2, 64, kernel_size=3, scope='C2_conv') C2 = slim.batch_norm(C2, decay=_BATCH_DECAY, is_training=training, scope='C2_BN') if config.with_CPFE: C1_cfe = CFE(C1, 32, 'C3_cfe', training) C2_cfe = CFE(C2, 32, 'C4_cfe', training) C3_cfe = CFE(C3, 32, 'C5_cfe', training) C1_cfe = BilinearDownsampling(C1_cfe, upsampling=(4, 4), name="C5_cfe_up4") C2_cfe = BilinearDownsampling(C2_cfe, upsampling=(2, 2), name="C4_cfe_up2") C123 = tf.concat([C1_cfe, C2_cfe, C3_cfe], axis=-1, name='C123_aspp_concat') C123 = slim.conv2d(C123, 64, kernel_size=1, scope='C123_conv') C123 = slim.batch_norm(C123, decay=_BATCH_DECAY, is_training=training, scope='C123_BN') #C123 = BilinearUpsampling(C345, upsampling=(4, 4), name="C123_up4") if config.with_SA: C5 = BilinearUpsampling(C5, upsampling=(2, 1), name="C2_up2") C45 = tf.concat([C4, C5], axis=-1, name='C12_concat') #C12 = tf.con`(name='C12_concat', axis=-1)([C1, C2]) C45 = slim.conv2d(C45, 64, kernel_size=3, scope='C12_conv') #C12 = Conv2D(64, (3, 3), padding='same', name='C12_conv')(C12) C45 = slim.batch_norm(C45, decay=_BATCH_DECAY, is_training=training, scope='C12') #C12 = BN(C12, 'C12') #C45 = tf.multiply(SA, C45, name="C12_atten_multiply") #C12 = Multiply(name='C12_atten_mutiply')([SA, C12]) C45 = BilinearUpsampling(C45, upsampling=(2, 1), name="C45_up3") SA = SpatialAttention(C45, training, name="spatial_attention") if config.with_CA: C45 = ChannelWiseAttention( C45, name="C345_ChannelWiseAttention_withcpfe") C123 = tf.multiply(SA, C123, name="C123_atten_multiply") net = tf.concat([C123, C45], axis=-1, name="fuse_concat") net = slim.conv2d(net, 256, padding="VALID", kernel_size=[2, 1], stride=[2, 1], scope='conv6') net.get_shape() # 2 x 32 cnn_out = slim.conv2d(net, 512, padding="VALID", kernel_size=[4, 1], stride=1, scope='conv7') # 1 x 32 tf.logging.info("feature network loaded") # second apply the map to sequence stage shape = cnn_out.get_shape().as_list() assert shape[1] == 1 sequence = tf.squeeze(cnn_out, axis=1) # third apply the sequence label stage shape = sequence.get_shape().as_list() B, W, C = shape with tf.variable_scope('Softmax_Layers'): # forward lstm cell # Doing the affine projection w = tf.Variable(tf.truncated_normal([C, num_classes], stddev=0.01), name="w") b = tf.Variable(tf.truncated_normal([num_classes], stddev=0.01), name="b") logits = tf.matmul(sequence, w) + b logits = tf.reshape(logits, [B, W, num_classes]) # Swap batch and batch axis net_out = tf.transpose(logits, (1, 0, 2), name='transpose_time_major') return net_out
def decoder(self, latent_var, is_training): activation_fn = leaky_relu # tf.nn.relu weight_decay = 0.0 with tf.variable_scope('decoder'): with slim.arg_scope([slim.batch_norm], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.fully_connected], weights_initializer=tf.truncated_normal_initializer( stddev=0.1), weights_regularizer=slim.l2_regularizer(weight_decay), normalizer_fn=slim.batch_norm, normalizer_params=self.batch_norm_params): net = slim.fully_connected(latent_var, 4096, activation_fn=None, normalizer_fn=None, scope='Fc_1') net = tf.reshape(net, [-1, 4, 4, 256], name='Reshape') net = tf.image.resize_nearest_neighbor(net, size=(8, 8), name='Upsample_1') net = slim.conv2d(net, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1a') net = slim.repeat(net, 3, conv2d_block, 0.1, 128, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_1b') net = tf.image.resize_nearest_neighbor(net, size=(16, 16), name='Upsample_2') net = slim.conv2d(net, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2a') net = slim.repeat(net, 3, conv2d_block, 0.1, 64, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_2b') net = tf.image.resize_nearest_neighbor(net, size=(32, 32), name='Upsample_3') net = slim.conv2d(net, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3a') net = slim.repeat(net, 3, conv2d_block, 0.1, 32, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_3b') net = tf.image.resize_nearest_neighbor(net, size=(64, 64), name='Upsample_4') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4a') net = slim.repeat(net, 3, conv2d_block, 0.1, 3, [3, 3], 1, activation_fn=activation_fn, scope='Conv2d_4b') net = slim.conv2d(net, 3, [3, 3], 1, activation_fn=None, scope='Conv2d_4c') return net
def get_map_from_images(imgs, mapper_arch, task_params, freeze_conv, wt_decay, is_training, batch_norm_is_training_op, num_maps, split_maps=True): # Hit image with a resnet. n_views = len(task_params.aux_delta_thetas) + 1 out = utils.Foo() images_reshaped = tf.reshape(imgs, shape=[-1, task_params.img_height, task_params.img_width, task_params.img_channels], name='re_image') x, out.vars_to_restore = get_repr_from_image( images_reshaped, task_params.modalities, task_params.data_augment, mapper_arch.encoder, freeze_conv, wt_decay, is_training) # Reshape into nice things so that these can be accumulated over time steps # for faster backprop. sh_before = x.get_shape().as_list() out.encoder_output = tf.reshape(x, shape=[task_params.batch_size, -1, n_views] + sh_before[1:]) x = tf.reshape(out.encoder_output, shape=[-1] + sh_before[1:]) # Add a layer to reduce dimensions for a fc layer. if mapper_arch.dim_reduce_neurons > 0: ks = 1; neurons = mapper_arch.dim_reduce_neurons; init_var = np.sqrt(2.0/(ks**2)/neurons) batch_norm_param = mapper_arch.batch_norm_param batch_norm_param['is_training'] = batch_norm_is_training_op out.conv_feat = slim.conv2d(x, neurons, kernel_size=ks, stride=1, normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_param, padding='SAME', scope='dim_reduce', weights_regularizer=slim.l2_regularizer(wt_decay), weights_initializer=tf.random_normal_initializer(stddev=init_var)) reshape_conv_feat = slim.flatten(out.conv_feat) sh = reshape_conv_feat.get_shape().as_list() out.reshape_conv_feat = tf.reshape(reshape_conv_feat, shape=[-1, sh[1]*n_views]) with tf.variable_scope('fc'): # Fully connected layers to compute the representation in top-view space. fc_batch_norm_param = {'center': True, 'scale': True, 'activation_fn':tf.nn.relu, 'is_training': batch_norm_is_training_op} f = out.reshape_conv_feat out_neurons = (mapper_arch.fc_out_size**2)*mapper_arch.fc_out_neurons neurons = mapper_arch.fc_neurons + [out_neurons] f, _ = tf_utils.fc_network(f, neurons=neurons, wt_decay=wt_decay, name='fc', offset=0, batch_norm_param=fc_batch_norm_param, is_training=is_training, dropout_ratio=mapper_arch.fc_dropout) f = tf.reshape(f, shape=[-1, mapper_arch.fc_out_size, mapper_arch.fc_out_size, mapper_arch.fc_out_neurons], name='re_fc') # Use pool5 to predict the free space map via deconv layers. with tf.variable_scope('deconv'): x, outs = deconv(f, batch_norm_is_training_op, wt_decay=wt_decay, neurons=mapper_arch.deconv_neurons, strides=mapper_arch.deconv_strides, layers_per_block=mapper_arch.deconv_layers_per_block, kernel_size=mapper_arch.deconv_kernel_size, conv_fn=slim.conv2d_transpose, offset=0, name='deconv') # Reshape x the right way. sh = x.get_shape().as_list() x = tf.reshape(x, shape=[task_params.batch_size, -1] + sh[1:]) out.deconv_output = x # Separate out the map and the confidence predictions, pass the confidence # through a sigmoid. if split_maps: with tf.name_scope('split'): out_all = tf.split(value=x, axis=4, num_or_size_splits=2*num_maps) out.fss_logits = out_all[:num_maps] out.confs_logits = out_all[num_maps:] with tf.name_scope('sigmoid'): out.confs_probs = [tf.nn.sigmoid(x) for x in out.confs_logits] return out
# -*- coding: utf-8 -* - ''' tensorflow slim基本使用方法 ''' import tensorflow as tf import tensorflow.contrib.slim as slim # 创建一个权重变量,名称为"weights",用一个截断的正态分布初始化它,用 l2_loss 进行正则,并将它放在 CPU 上 weights_var = slim.variable( 'weights', shape=[10, 10, 3, 3], initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(0.05), device='/CPU:0') # 通过model_variable来定义一个代表模型参数的变量,non-model变量指训练、评估过程中需要但推理过程不需要的变量(例如global step) weights_model_var = slim.model_variable( 'weights', shape=[10, 10, 3, 3], initializer=tf.truncated_normal_initializer(stddev=0.1), regularizer=slim.l2_regularizer(0.05), device='/CPU:0') model_variables = slim.get_model_variables() # 定义并获取一个常规的变量 my_var = slim.variable("my_var", shape=[20, 1], initializer=tf.zeros_initializer()) regular_variables_and_model_variables = slim.get_variables() # slim.model_variable将变量添加到了tf.GrapghKeys.MODEL_VARIABLES容器中,也可以手动将自定义的layer或variables添加到对应的容器中
def cnn_network(self, incoming, num_classes=1501, reuse=None, l2_normalize=True, create_summaries=False, weight_decay=1e-8): nonlinearity = tf.nn.elu conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) conv_bias_init = tf.zeros_initializer() conv_regularizer = slim.l2_regularizer(self.weight_decay) fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) fc_bias_init = tf.zeros_initializer() fc_regularizer = slim.l2_regularizer(self.weight_decay) def batch_norm_fn(x): return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") network = incoming network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) tf.summary.image("conv1_1/weights", tf.transpose( slim.get_variables("conv1_1/weights:0")[0], [3, 0, 1, 2]), max_outputs=128) network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1") network = self.cnn_residual_block( network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, is_first=True, summarize_activations=create_summaries) network = self.cnn_residual_block( network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = self.cnn_residual_block( network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = self.cnn_residual_block( network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = self.cnn_residual_block( network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = self.cnn_residual_block( network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) feature_dim = network.get_shape().as_list()[-1] # print("feature dimensionality: ", feature_dim) network = slim.flatten(network) network = slim.dropout(network, keep_prob=0.6) network = slim.fully_connected(network, feature_dim, activation_fn=nonlinearity, normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, scope="fc1", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) features = network if l2_normalize: # Features in rows, normalize axis 1. features = slim.batch_norm(features, scope="ball", reuse=reuse) feature_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(features), [1], keep_dims=True)) features = features / feature_norm with slim.variable_scope.variable_scope("ball", reuse=reuse): weights = slim.model_variable( "mean_vectors", (feature_dim, num_classes), initializer=tf.truncated_normal_initializer(stddev=1e-3), regularizer=None) scale = slim.model_variable("scale", (num_classes, ), tf.float32, tf.constant_initializer( 0., tf.float32), regularizer=None) if create_summaries: tf.summary.histogram("scale", scale) # scale = slim.model_variable( # "scale", (), tf.float32, # initializer=tf.constant_initializer(0., tf.float32), # regularizer=slim.l2_regularizer(1e-2)) # if create_summaries: # tf.scalar_summary("scale", scale) scale = tf.nn.softplus(scale) # Each mean vector in columns, normalize axis 0. weight_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(weights), [0], keep_dims=True)) logits = scale * tf.matmul(features, weights / weight_norm) else: logits = slim.fully_connected(features, num_classes, activation_fn=None, normalizer_fn=None, weights_regularizer=fc_regularizer, scope="softmax", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) return features
def fusion(self, vid_emb_state, cap_emb_state, iii, reuse=False): states = [] for i in range(int(self.config.video_steps)): vid_sample = tf.tile(tf.expand_dims(vid_emb_state[:, i, :], 1), [1, self.config.caption_length, 1]) sum_repr = tf.multiply(vid_sample, cap_emb_state) states.append(sum_repr) # V x B x C x 256 cnn_repr = tf.stack(states) # B x V x C x 256 cnn_repr = tf.transpose(cnn_repr, [1, 0, 2, 3]) with slim.arg_scope( [slim.fully_connected], weights_regularizer=slim.l2_regularizer(0.0005), #activation_fn=tf.nn.tanh, normalizer_fn=self.bn_fn, normalizer_params=self.bn_params): h1 = slim.fully_connected(cnn_repr, 512, scope='rel_h1', activation_fn=tf.nn.tanh, reuse=reuse) input_gate1 = slim.fully_connected(h1, 1, scope='rel_halp', activation_fn=tf.nn.sigmoid, reuse=reuse) h2 = slim.fully_connected(cnn_repr, 512, scope='rel_h2', activation_fn=tf.nn.tanh, reuse=reuse) h3 = slim.fully_connected(h2, 512, scope='rel_h3', activation_fn=tf.nn.tanh, reuse=reuse) output1 = tf.multiply(h3, input_gate1) output1 = tf.multiply( tf.multiply( output1, tf.expand_dims( tf.expand_dims(tf.expand_dims(self.video_mask[iii], 0), 2), 3)), tf.expand_dims(tf.expand_dims(self.caption_mask, 1), 3)) # Conv with slim.arg_scope( [slim.conv2d], activation_fn=tf.nn.tanh, weights_initializer=self.initializer, weights_regularizer=slim.l2_regularizer(0.0005), #normalizer_fn=self.bn_fn, #normalizer_params=self.bn_params, reuse=reuse): # Conv1 conv1 = slim.conv2d(output1, 256, [3, 3], padding="Valid", scope='conv1', activation_fn=tf.nn.leaky_relu) # added ratio = 4 average_conv1 = slim.avg_pool2d(conv1, [conv1.shape[1], conv1.shape[2]]) fc1_conv1 = slim.fully_connected(average_conv1, int(256 / ratio), activation_fn=tf.nn.leaky_relu, scope='conv1_fc1', reuse=tf.AUTO_REUSE) fc2_conv1 = slim.fully_connected(fc1_conv1, 256, activation_fn=tf.nn.sigmoid, scope='conv1_fc2', reuse=tf.AUTO_REUSE) fc2_conv1 = tf.reshape(fc2_conv1, [-1, 1, 1, 256]) conv1 = conv1 * fc2_conv1 convalp1 = slim.conv2d(output1, 1, [3, 3], padding="Valid", scope='conv1alp', activation_fn=tf.nn.sigmoid) input_gate2 = convalp1 output2 = tf.multiply(conv1, input_gate2) output2 = tf.multiply( tf.multiply( output2, tf.expand_dims( tf.expand_dims( tf.expand_dims(self.video_mask_list[0][iii], 0), 2), 3)), tf.expand_dims(tf.expand_dims(self.caption_mask_list[0], 1), 3)) #Conv2 conv2 = slim.conv2d(output2, 256, [3, 3], padding="Valid", scope='conv2', activation_fn=tf.nn.leaky_relu) # added ratio = 4 average_conv2 = slim.avg_pool2d(conv2, [conv2.shape[1], conv2.shape[2]]) fc1_conv2 = slim.fully_connected(average_conv2, int(256 / ratio), activation_fn=tf.nn.leaky_relu, scope='conv2_fc1', reuse=tf.AUTO_REUSE) fc2_conv2 = slim.fully_connected(fc1_conv2, 256, activation_fn=tf.nn.sigmoid, scope='conv2_fc2', reuse=tf.AUTO_REUSE) fc2_conv2 = tf.reshape(fc2_conv2, [-1, 1, 1, 256]) conv2 = conv2 * fc2_conv2 convalp2 = slim.conv2d(output2, 1, [3, 3], padding="Valid", scope='conv2alp', activation_fn=tf.nn.sigmoid) input_gate3 = convalp2 output3 = tf.multiply(conv2, input_gate3) output3 = tf.multiply( tf.multiply( output3, tf.expand_dims( tf.expand_dims( tf.expand_dims(self.video_mask_list[1][iii], 0), 2), 3)), tf.expand_dims(tf.expand_dims(self.caption_mask_list[1], 1), 3)) #Conv3 conv3 = slim.conv2d(output3, 256, [3, 3], [2, 2], padding="Valid", scope='conv3', activation_fn=tf.nn.leaky_relu) # added ratio = 4 average_conv3 = slim.avg_pool2d(conv3, [conv3.shape[1], conv3.shape[2]]) fc1_conv3 = slim.fully_connected(average_conv3, int(256 / ratio), activation_fn=tf.nn.leaky_relu, scope='conv3_fc1', reuse=tf.AUTO_REUSE) fc2_conv3 = slim.fully_connected(fc1_conv3, 256, activation_fn=tf.nn.sigmoid, scope='conv3_fc2', reuse=tf.AUTO_REUSE) fc2_conv3 = tf.reshape(fc2_conv3, [-1, 1, 1, 256]) conv3 = conv3 * fc2_conv3 convalp3 = slim.conv2d(output3, 1, [3, 3], [2, 2], padding="Valid", scope='conv3alp', activation_fn=tf.nn.sigmoid) input_gate4 = convalp3 output4 = tf.multiply(conv3, input_gate4) output4 = tf.multiply( tf.multiply( output4, tf.expand_dims( tf.expand_dims( tf.expand_dims(self.video_mask_list[2][iii], 0), 2), 3)), tf.expand_dims(tf.expand_dims(self.caption_mask_list[2], 1), 3)) valid = tf.multiply( tf.reduce_sum(self.video_mask_list[2][iii], axis=0), tf.reduce_sum(self.caption_mask_list[2], axis=1)) sum_state = tf.div(tf.reduce_sum(output4, [1, 2]), tf.expand_dims(valid, axis=1)) return sum_state
def pfld_inference_for_shuffleNetV2(input, weight_decay, shuffle_group=2): # [(out_channel, repeat_times), (out_channel, repeat_times), ...] # # model_scale = 0.5 # channel_sizes = [(48, 4), (96, 8), (192, 4), (1024, 1)] # model_scale = 1.0 channel_sizes = [(116, 4), (232, 8), (464, 4), (1024, 1)] # # model_scale = 1.5 # channel_sizes = [(176, 4), (352, 8), (704, 4), (1024, 1)] # # # model_scale = 2.0 # channel_sizes = [(244, 4), (488, 8), (976, 4), (2048, 1)] with tf.variable_scope('pfld_inference'): features = {} with slim.arg_scope([slim.convolution2d, slim.separable_conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.01), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(weight_decay), padding='SAME'): print('PFLD input shape({}): {}'.format(input.name, input.get_shape())) # 112*112*3=>56*56*24 with tf.variable_scope('conv_1'): conv1 = conv_bn_relu(input, 24, [3, 3], stride=2) print(conv1.name, conv1.get_shape()) # 实现stage2:56*56*24=>28*28*C with tf.variable_scope('shuffle_block_1'): out_channel, repeate_times = channel_sizes[0] shuffle_block_1 = shufflenet_v2_block(conv1, out_channel, stride=2, shuffle_group=shuffle_group) print(shuffle_block_1.name, shuffle_block_1.get_shape()) for i in range(repeate_times - 1): shuffle_block_1 = shufflenet_v2_block(shuffle_block_1, out_channel, stride=1, shuffle_group=shuffle_group) print(shuffle_block_1.name, shuffle_block_1.get_shape()) features['auxiliary_input'] = shuffle_block_1 # 实现stage3:28*28*C=>14*14*C with tf.variable_scope('shuffle_block_2'): out_channel, repeate_times = channel_sizes[1] shuffle_block_2 = shufflenet_v2_block(shuffle_block_1, out_channel, stride=2, shuffle_group=shuffle_group) print(shuffle_block_2.name, shuffle_block_2.get_shape()) for i in range(repeate_times - 1): shuffle_block_2 = shufflenet_v2_block(shuffle_block_2, out_channel, stride=1, shuffle_group=shuffle_group) print(shuffle_block_2.name, shuffle_block_2.get_shape()) # 实现stage4:14*14*C=>7*7*C with tf.variable_scope('shuffle_block_3'): out_channel, repeate_times = channel_sizes[2] shuffle_block_3 = shufflenet_v2_block(shuffle_block_2, out_channel, stride=2, shuffle_group=shuffle_group) print(shuffle_block_3.name, shuffle_block_3.get_shape()) for i in range(repeate_times - 1): shuffle_block_3 = shufflenet_v2_block(shuffle_block_3, out_channel, stride=1, shuffle_group=shuffle_group) print(shuffle_block_3.name, shuffle_block_3.get_shape()) # 7*7*C=>1*1*C with tf.variable_scope('end_conv'): with slim.arg_scope([slim.convolution2d], padding='valid'): out_channel = channel_sizes[-1][0] end_conv = conv_bn_relu(shuffle_block_3, out_channel, [3, 3], stride=1) print(end_conv.name, end_conv.get_shape()) end_conv = conv_bn_relu(end_conv, out_channel, [3, 3], stride=1) print(end_conv.name, end_conv.get_shape()) end_conv = conv_bn_relu(end_conv, out_channel, [3, 3], stride=1,) print(end_conv.name, end_conv.get_shape()) group_pool1 = slim.avg_pool2d(shuffle_block_2, [shuffle_block_2.get_shape()[1], shuffle_block_2.get_shape()[2]], stride=1) print(group_pool1.name, group_pool1.get_shape()) group_pool2 = slim.avg_pool2d(shuffle_block_3, [shuffle_block_3.get_shape()[1], shuffle_block_3.get_shape()[2]], stride=1) print(group_pool2.name, group_pool2.get_shape()) group_pool3 = slim.avg_pool2d(end_conv, [end_conv.get_shape()[1], end_conv.get_shape()[2]], stride=1) print(group_pool3.name, group_pool3.get_shape()) s1 = slim.flatten(group_pool1) s2 = slim.flatten(group_pool2) s3 = slim.flatten(group_pool3) multi_scale = tf.concat([s1, s2, s3], 1) landmarks = slim.fully_connected(multi_scale, num_outputs=196, activation_fn=None, scope='fc') print(landmarks.name, landmarks.get_shape()) return features, landmarks
def P_Net(inputs, label=None, bbox_target=None, landmark_target=None, training=True): #define common param print('P_Net') with slim.arg_scope([slim.conv2d], activation_fn=prelu, weights_initializer=slim.xavier_initializer(), biases_initializer=tf.zeros_initializer(), weights_regularizer=slim.l2_regularizer(0.0005), padding='valid'): print(inputs.get_shape()) net = slim.conv2d(inputs, 10, 3, stride=1, scope='conv1') _activation_summary(net) print(net.get_shape()) net = slim.max_pool2d(net, kernel_size=[2, 2], stride=2, scope='pool1', padding='SAME') _activation_summary(net) print(net.get_shape()) net = slim.conv2d(net, num_outputs=16, kernel_size=[3, 3], stride=1, scope='conv2') _activation_summary(net) print(net.get_shape()) # net = slim.conv2d(net, num_outputs=32, kernel_size=[3, 3], stride=1, scope='conv3') _activation_summary(net) print(net.get_shape()) #batch*H*W*2 conv4_1 = slim.conv2d(net, num_outputs=2, kernel_size=[1, 1], stride=1, scope='conv4_1', activation_fn=tf.nn.softmax) _activation_summary(conv4_1) #conv4_1 = slim.conv2d(net,num_outputs=1,kernel_size=[1,1],stride=1,scope='conv4_1',activation_fn=tf.nn.sigmoid) print(conv4_1.get_shape()) #batch*H*W*4 bbox_pred = slim.conv2d(net, num_outputs=4, kernel_size=[1, 1], stride=1, scope='conv4_2', activation_fn=None) _activation_summary(bbox_pred) print(bbox_pred.get_shape()) #batch*H*W*10 landmark_pred = slim.conv2d(net, num_outputs=(no_landmarks * 2), kernel_size=[1, 1], stride=1, scope='conv4_3', activation_fn=None) _activation_summary(landmark_pred) print(landmark_pred.get_shape()) # add projectors for visualization #cls_prob_original = conv4_1 #bbox_pred_original = bbox_pred if training: #batch*2 # calculate classification loss cls_prob = tf.squeeze(conv4_1, [1, 2], name='cls_prob') cls_loss = cls_ohem(cls_prob, label) #batch # cal bounding box error, squared sum error bbox_pred = tf.squeeze(bbox_pred, [1, 2], name='bbox_pred') bbox_loss = bbox_ohem(bbox_pred, bbox_target, label) #batch*10 landmark_pred = tf.squeeze(landmark_pred, [1, 2], name="landmark_pred") landmark_loss = landmark_ohem(landmark_pred, landmark_target, label) accuracy = cal_accuracy(cls_prob, label) L2_loss = tf.add_n(slim.losses.get_regularization_losses()) return cls_loss, bbox_loss, landmark_loss, L2_loss, accuracy, landmark_pred #test else: #when test,batch_size = 1 cls_pro_test = tf.squeeze(conv4_1, axis=0) print('class prob shape', cls_pro_test.get_shape()) bbox_pred_test = tf.squeeze(bbox_pred, axis=0) print('bbox shape', bbox_pred_test.get_shape()) landmark_pred_test = tf.squeeze(landmark_pred, axis=0) print('landmark pred shape', landmark_pred_test.get_shape()) return cls_pro_test, bbox_pred_test, landmark_pred_test