def conv_net_kelz(inputs): """Builds the ConvNet from Kelz 2016.""" with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.variance_scaling_initializer( factor=2.0, mode='FAN_AVG', uniform=True)): net = slim.conv2d( inputs, 32, [3, 3], scope='conv1', normalizer_fn=slim.batch_norm) net = slim.conv2d( net, 32, [3, 3], scope='conv2', normalizer_fn=slim.batch_norm) net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool2') net = slim.dropout(net, 0.25, scope='dropout2') net = slim.conv2d( net, 64, [3, 3], scope='conv3', normalizer_fn=slim.batch_norm) net = slim.max_pool2d(net, [1, 2], stride=[1, 2], scope='pool3') net = slim.dropout(net, 0.25, scope='dropout3') # Flatten while preserving batch and time dimensions. dims = tf.shape(net) net = tf.reshape(net, (dims[0], dims[1], net.shape[2].value * net.shape[3].value), 'flatten4') net = slim.fully_connected(net, 512, scope='fc5') net = slim.dropout(net, 0.5, scope='dropout5') return net
def build_graph(top_k): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag') with tf.device('/gpu:0'): with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}): conv3_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv3_1') max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2], padding='SAME', scope='pool1') conv3_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv3_2') max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool2') conv3_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3_3') max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2], padding='SAME', scope='pool3') conv3_4 = slim.conv2d(max_pool_3, 512, [3, 3], padding='SAME', scope='conv3_4') conv3_5 = slim.conv2d(conv3_4, 512, [3, 3], padding='SAME', scope='conv3_5') max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4') flatten = slim.flatten(max_pool_4) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.relu, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=0.1) train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step) probabilities = tf.nn.softmax(logits) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return {'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_op': train_op, 'loss': loss, 'is_training': is_training, 'accuracy': accuracy, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_val_top_k': predicted_val_top_k}
def construct_embedding(self): """Builds a conv -> spatial softmax -> FC adaptation network.""" is_training = self._is_training normalizer_params = {'is_training': is_training} with tf.variable_scope('tcn_net', reuse=self._reuse) as vs: self._adaptation_scope = vs.name with slim.arg_scope( [slim.layers.conv2d], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params, weights_regularizer=slim.regularizers.l2_regularizer( self._l2_reg_weight), biases_regularizer=slim.regularizers.l2_regularizer( self._l2_reg_weight)): with slim.arg_scope( [slim.layers.fully_connected], activation_fn=tf.nn.relu, normalizer_fn=slim.batch_norm, normalizer_params=normalizer_params, weights_regularizer=slim.regularizers.l2_regularizer( self._l2_reg_weight), biases_regularizer=slim.regularizers.l2_regularizer( self._l2_reg_weight)): # Input to embedder is pre-trained inception output. net = self._pretrained_output # Optionally add more conv layers. for num_filters in self._additional_conv_sizes: net = slim.layers.conv2d( net, num_filters, kernel_size=[3, 3], stride=[1, 1]) net = slim.dropout(net, keep_prob=self._conv_hidden_keep_prob, is_training=is_training) # Take the spatial soft arg-max of the last convolutional layer. # This is a form of spatial attention over the activations. # See more here: http://arxiv.org/abs/1509.06113. net = tf.contrib.layers.spatial_softmax(net) self.spatial_features = net # Add fully connected layers. net = slim.layers.flatten(net) for fc_hidden_size in self._fc_hidden_sizes: net = slim.layers.fully_connected(net, fc_hidden_size) if self._fc_hidden_keep_prob < 1.0: net = slim.dropout(net, keep_prob=self._fc_hidden_keep_prob, is_training=is_training) # Connect last FC layer to embedding. net = slim.layers.fully_connected(net, self._embedding_size, activation_fn=None) # Optionally L2 normalize the embedding. if self._embedding_l2: net = tf.nn.l2_normalize(net, dim=1) return net
def inference(inputs): x = tf.reshape(inputs,[-1,28,28,1]) conv_1 = tf.nn.relu(slim.conv2d(x,32,[3,3])) #28 * 28 * 32 pool_1 = slim.max_pool2d(conv_1,[2,2]) # 14 * 14 * 32 block_1 = res_identity(pool_1,32,[3,3],'layer_2') block_2 = res_change(block_1,64,[3,3],'layer_3') block_3 = res_identity(block_2,64,[3,3],'layer_4') block_4 = res_change(block_3,32,[3,3],'layer_5') net_flatten = slim.flatten(block_4,scope='flatten') fc_1 = slim.fully_connected(slim.dropout(net_flatten,0.8),200,activation_fn=tf.nn.tanh,scope='fc_1') output = slim.fully_connected(slim.dropout(fc_1,0.8),10,activation_fn=None,scope='output_layer') return output
def _build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net # build the anchors for the image self._anchor_component() # region proposal network rois = self._region_proposal(net, is_training, initializer) # region of interest pooling if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net, rois, "pool5") else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') # region classification cls_prob, bbox_pred = self._region_classification(fc7, is_training, initializer, initializer_bbox) self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def _head_to_tail(self, pool5, is_training, reuse=False): with tf.variable_scope(self._scope, self._scope, reuse=reuse): pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') return fc7
def build_arch_baseline(input, is_train: bool, num_classes: int): bias_initializer = tf.truncated_normal_initializer( mean=0.0, stddev=0.01) # tf.constant_initializer(0.0) # The paper didnot mention any regularization, a common l2 regularizer to weights is added here weights_regularizer = tf.contrib.layers.l2_regularizer(5e-04) tf.logging.info('input shape: {}'.format(input.get_shape())) # weights_initializer=initializer, with slim.arg_scope([slim.conv2d, slim.fully_connected], trainable=is_train, biases_initializer=bias_initializer, weights_regularizer=weights_regularizer): with tf.variable_scope('relu_conv1') as scope: output = slim.conv2d(input, num_outputs=32, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer1') tf.logging.info('output shape: {}'.format(output.get_shape())) with tf.variable_scope('relu_conv2') as scope: output = slim.conv2d(output, num_outputs=64, kernel_size=[ 5, 5], stride=1, padding='SAME', scope=scope, activation_fn=tf.nn.relu) output = slim.max_pool2d(output, [2, 2], scope='max_2d_layer2') tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.flatten(output) output = slim.fully_connected(output, 1024, scope='relu_fc3', activation_fn=tf.nn.relu) tf.logging.info('output shape: {}'.format(output.get_shape())) output = slim.dropout(output, 0.5, scope='dp') output = slim.fully_connected(output, num_classes, scope='final_layer', activation_fn=None) tf.logging.info('output shape: {}'.format(output.get_shape())) return output
def LResnet50E_IR(images, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0, reuse=None): ''' conv name conv[conv_layer]_[block_index]_[block_layer_index] for resnet50 n_units=[3,4,14,3], consider one unit is dim_reduction_layer repeat n_units=[2,3,13,2] ''' with tf.variable_scope('Conv1'): net = slim.conv2d(images,64,scope='Conv1_pre') net = slim.batch_norm(net,scope='Conv1_bn') with tf.variable_scope('Conv2'): net = resface_block(net,64,stride=2,dim_match=False,scope='Conv2_pre') net = slim.repeat(net,2,resface_block,64,1,True,scope='Conv2_main') with tf.variable_scope('Conv3'): net = resface_block(net,128,stride=2,dim_match=False,scope='Conv3_pre') net = slim.repeat(net,3,resface_block,128,1,True,scope='Conv3_main') with tf.variable_scope('Conv4'): net = resface_block(net,256,stride=2,dim_match=False,scope='Conv4_pre') net = slim.repeat(net,13,resface_block,256,1,True,scope='Conv4_main') with tf.variable_scope('Conv5'): net = resface_block(net,512,stride=2,dim_match=False,scope='Conv5_pre') net = slim.repeat(net,2,resface_block,512,1,True,scope='Conv5_main') with tf.variable_scope('Logits'): net = slim.batch_norm(net,activation_fn=None,scope='bn1') net = slim.dropout(net, keep_probability, is_training=phase_train,scope='Dropout') net = slim.flatten(net) net = slim.fully_connected(net, bottleneck_layer_size, biases_initializer=tf.contrib.layers.xavier_initializer(), scope='fc1') net = slim.batch_norm(net, activation_fn=None, scope='Bottleneck') return net,''
def metric_net(img, scope, df_dim=64, reuse=False, train=True): bn = functools.partial(slim.batch_norm, scale=True, is_training=train, decay=0.9, epsilon=1e-5, updates_collections=None) with tf.variable_scope(scope + '_discriminator', reuse=reuse): h0 = lrelu(conv(img, df_dim, 4, 2, scope='h0_conv')) # h0 is (128 x 128 x df_dim) pool1 = Mpool(h0, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID') h1 = lrelu(conv(pool1, df_dim * 2, 4, 2, scope='h1_conv')) # h1 is (32 x 32 x df_dim*2) pool2 = Mpool(h1, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID') h2 = lrelu(conv(pool2, df_dim * 4, 4, 2, scope='h2_conv')) # h2 is (8 x 8 x df_dim*4) pool3 = Mpool(h2, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID') h3 = lrelu(conv(pool3, df_dim * 8, 4, 2, scope='h3_conv')) # h3 is (2 x 2 x df_dim*4) pool4 = Mpool(h3, [1, 2, 2, 1], [1, 2, 2, 1], padding='VALID') shape = pool4.get_shape() flatten_shape = shape[1].value * shape[2].value * shape[3].value h3_reshape = tf.reshape(pool4, [-1, flatten_shape], name = 'h3_reshape') fc1 = lrelu(FC(h3_reshape, df_dim*2, scope='fc1')) dropout_fc1 = slim.dropout(fc1, 0.5, scope='dropout_fc1') net = FC(dropout_fc1, df_dim, scope='fc2') #print_activations(net) #print_activations(pool4) return net
def clone_fn(batch_queue): """Allows data parallelism by creating multiple clones of network_fn.""" images, b_input_mask, b_labels_input, b_box_delta_input, b_box_input = batch_queue.dequeue() anchors = tf.convert_to_tensor(config.ANCHOR_SHAPE, dtype=tf.float32) end_points = network_fn(images) end_points["viz_images"] = images conv_ds_14 = end_points['MobileNet/conv_ds_14/depthwise_conv'] dropout = slim.dropout(conv_ds_14, keep_prob=0.5, is_training=True) num_output = config.NUM_ANCHORS * (config.NUM_CLASSES + 1 + 4) predict = slim.conv2d(dropout, num_output, kernel_size=(3, 3), stride=1, padding='SAME', activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.0001), scope="MobileNet/conv_predict") with tf.name_scope("Interpre_prediction") as scope: pred_box_delta, pred_class_probs, pred_conf, ious, det_probs, det_boxes, det_class = \ interpre_prediction(predict, b_input_mask, anchors, b_box_input) end_points["viz_det_probs"] = det_probs end_points["viz_det_boxes"] = det_boxes end_points["viz_det_class"] = det_class with tf.name_scope("Losses") as scope: losses(b_input_mask, b_labels_input, ious, b_box_delta_input, pred_class_probs, pred_conf, pred_box_delta) return end_points
def create_inner_block( incoming, scope, nonlinearity=tf.nn.elu, weights_initializer=tf.truncated_normal_initializer(1e-3), bias_initializer=tf.zeros_initializer(), regularizer=None, increase_dim=False, summarize_activations=True): n = incoming.get_shape().as_list()[-1] stride = 1 if increase_dim: n *= 2 stride = 2 incoming = slim.conv2d( incoming, n, [3, 3], stride, activation_fn=nonlinearity, padding="SAME", normalizer_fn=_batch_norm_fn, weights_initializer=weights_initializer, biases_initializer=bias_initializer, weights_regularizer=regularizer, scope=scope + "/1") if summarize_activations: tf.summary.histogram(incoming.name + "/activations", incoming) incoming = slim.dropout(incoming, keep_prob=0.6) incoming = slim.conv2d( incoming, n, [3, 3], 1, activation_fn=None, padding="SAME", normalizer_fn=None, weights_initializer=weights_initializer, biases_initializer=bias_initializer, weights_regularizer=regularizer, scope=scope + "/2") return incoming
def construct_embedding(self): """Builds an embedding function on top of images. Method to be overridden by implementations. Returns: embeddings: A 2-d float32 `Tensor` of shape [batch_size, embedding_size] holding the embedded images. """ with tf.variable_scope('tcn_net', reuse=self._reuse) as vs: self._adaptation_scope = vs.name net = self._pretrained_output # Define some adaptation blocks on top of the pre-trained resnet output. adaptation_blocks = [] adaptation_block_params = [map( int, i.split('_')) for i in self._config.adaptation_blocks.split('-')] for i, (depth, num_units) in enumerate(adaptation_block_params): block = resnet_v2.resnet_v2_block( 'adaptation_block_%d' % i, base_depth=depth, num_units=num_units, stride=1) adaptation_blocks.append(block) # Stack them on top of the resent output. net = resnet_utils.stack_blocks_dense( net, adaptation_blocks, output_stride=None) # Average pool the output. net = tf.reduce_mean(net, [1, 2], name='adaptation_pool', keep_dims=True) if self._config.emb_connection == 'fc': # Use fully connected layer to project to embedding layer. fc_hidden_sizes = self._config.fc_hidden_sizes if fc_hidden_sizes == 'None': fc_hidden_sizes = [] else: fc_hidden_sizes = map(int, fc_hidden_sizes.split('_')) fc_hidden_keep_prob = self._config.dropout.keep_fc net = tf.squeeze(net) for fc_hidden_size in fc_hidden_sizes: net = slim.layers.fully_connected(net, fc_hidden_size) if fc_hidden_keep_prob < 1.0: net = slim.dropout(net, keep_prob=fc_hidden_keep_prob, is_training=self._is_training) # Connect last FC layer to embedding. embedding = slim.layers.fully_connected(net, self._embedding_size, activation_fn=None) else: # Use 1x1 conv layer to project to embedding layer. embedding = slim.conv2d( net, self._embedding_size, [1, 1], activation_fn=None, normalizer_fn=None, scope='embedding') embedding = tf.squeeze(embedding) # Optionally L2 normalize the embedding. if self._embedding_l2: embedding = tf.nn.l2_normalize(embedding, dim=1) return embedding
def resface36(images, keep_probability, phase_train=True, bottleneck_layer_size=512, weight_decay=0.0, reuse=None): ''' conv name conv[conv_layer]_[block_index]_[block_layer_index] ''' with tf.variable_scope('Conv1'): net = resface_pre(images,64,scope='Conv1_pre') net = slim.repeat(net,2,resface_block,64,scope='Conv_1') with tf.variable_scope('Conv2'): net = resface_pre(net,128,scope='Conv2_pre') net = slim.repeat(net,4,resface_block,128,scope='Conv_2') with tf.variable_scope('Conv3'): net = resface_pre(net,256,scope='Conv3_pre') net = slim.repeat(net,8,resface_block,256,scope='Conv_3') with tf.variable_scope('Conv4'): net = resface_pre(net,512,scope='Conv4_pre') #net = resface_block(Conv4_pre,512,scope='Conv4_1') net = slim.repeat(net,1,resface_block,512,scope='Conv4') with tf.variable_scope('Logits'): #pylint: disable=no-member #net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', # scope='AvgPool') net = slim.flatten(net) net = slim.dropout(net, keep_probability, is_training=phase_train, scope='Dropout') net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net,''
def build_single_inceptionv1(train_tfdata, is_train, dropout_keep_prob): with slim.arg_scope(inception.inception_v1_arg_scope()): identity, end_points = inception.inception_v1(train_tfdata, dropout_keep_prob = dropout_keep_prob, is_training=is_train) net = slim.avg_pool2d(end_points['Mixed_5c'], [7, 7], stride=1, scope='MaxPool_0a_7x7') net = slim.dropout(net, dropout_keep_prob, scope='Dropout_0b') feature = tf.squeeze(net, [1, 2]) return identity, feature
def __init__(self,is_training): self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,64,64,3],name='input_image') self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label') self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd') #logits, end_points = resnet_v2.resnet_v2_50(self.input_image, num_classes=100, is_training=True) # flatten_hist = tf.reshape(self.input_image,[-1,96]) self.keep_prob = tf.placeholder(tf.float32) weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay) flatten_hist = tf.reshape(self.input_image,[-1,3*64*64]) flatten_hist = tf.concat([flatten_hist,self.input_nlcd],1) x = slim.fully_connected(flatten_hist, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1') x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2') flatten_hist = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer, scope='decoder/fc_3') all_logits = [] all_output = [] for i in range(100): if i == 0 : current_input_x = flatten_hist else: current_output = tf.concat(all_output,1) current_input_x = tf.concat([flatten_hist,current_output],1) x = slim.fully_connected(current_input_x, 256,weights_regularizer=weights_regularizer) x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer) #x = slim.fully_connected(x, 17,weights_regularizer=weights_regularizer) x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) all_logits.append(slim.fully_connected(x, 1, activation_fn=None, weights_regularizer=weights_regularizer)) all_output.append(tf.sigmoid(all_logits[i])) final_logits = tf.concat(all_logits,1) final_output = tf.sigmoid(final_logits) self.output = final_output self.ce_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_label,logits=final_logits),1)) slim.losses.add_loss(self.ce_loss) tf.summary.scalar('ce_loss',self.ce_loss) # l2 loss self.l2_loss = tf.add_n(slim.losses.get_regularization_losses()) tf.summary.scalar('l2_loss',self.l2_loss) #total loss self.total_loss = slim.losses.get_total_loss() tf.summary.scalar('total_loss',self.total_loss) #self.output = tf.sigmoid(x)
def build_graph(top_k): # with tf.device('/cpu:0'): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1') max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME') conv_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv2') max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME') conv_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3') max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME') flatten = slim.flatten(max_pool_3) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.tanh, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') # logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc') loss = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) rate = tf.train.exponential_decay(2e-4, global_step, decay_steps=2000, decay_rate=0.97, staircase=True) train_op = tf.train.AdamOptimizer(learning_rate=rate).minimize(loss, global_step=global_step) probabilities = tf.nn.softmax(logits) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean(tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return {'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_op': train_op, 'loss': loss, 'accuracy': accuracy, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_val_top_k': predicted_val_top_k}
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(1e-6)): model = slim.conv2d(x, 96, [11, 11], 4, padding='VALID', scope='conv1') model = slim.max_pool2d(model, [3, 3], 2, scope='pool1') model = slim.conv2d(model, 256, [5, 5], 1, scope='conv2') model = slim.max_pool2d(model, [3, 3], 2, scope='pool2') model = slim.conv2d(model, 384, [3, 3], 1, scope='conv3') model = slim.conv2d(model, 384, [3, 3], 1, scope='conv4') model = slim.conv2d(model, 256, [3, 3], 1, scope='conv5') model = slim.max_pool2d(model, [3, 3], 2, scope='pool5') model = slim.flatten(model) model = slim.fully_connected(model, 4096, activation_fn=None, scope='fc1') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1') model = slim.fully_connected(model, 4096, activation_fn=None, scope='fc2') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do2') model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc3') return model
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.0005)): model = slim.repeat(x, 2, slim.conv2d, 64, [3, 3], scope='conv1') model = slim.max_pool2d(model, [2, 2], scope='pool1') model = slim.repeat(model, 2, slim.conv2d, 128, [3, 3], scope='conv2') model = slim.max_pool2d(model, [2, 2], scope='pool2') model = slim.repeat(model, 3, slim.conv2d, 256, [3, 3], scope='conv3') model = slim.max_pool2d(model, [2, 2], scope='pool3') model = slim.repeat(model, 3, slim.conv2d, 512, [3, 3], scope='conv4') model = slim.max_pool2d(model, [2, 2], scope='pool4') model = slim.repeat(model, 3, slim.conv2d, 512, [3, 3], scope='conv5') model = slim.max_pool2d(model, [2, 2], scope='pool5') model = slim.flatten(model, scope='flatten5') model = slim.fully_connected(model, 4096, scope='fc6') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do6') model = slim.fully_connected(model, 4096, scope='fc7') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do7') model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fcX8') return model
def conv_net(inputs, hparams): """Builds the ConvNet from Kelz 2016.""" with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.contrib.layers.variance_scaling_initializer( factor=2.0, mode='FAN_AVG', uniform=True)): net = inputs i = 0 for (conv_temporal_size, conv_freq_size, num_filters, freq_pool_size, dropout_amt) in zip( hparams.temporal_sizes, hparams.freq_sizes, hparams.num_filters, hparams.pool_sizes, hparams.dropout_keep_amts): net = slim.conv2d( net, num_filters, [conv_temporal_size, conv_freq_size], scope='conv' + str(i), normalizer_fn=slim.batch_norm) if freq_pool_size > 1: net = slim.max_pool2d( net, [1, freq_pool_size], stride=[1, freq_pool_size], scope='pool' + str(i)) if dropout_amt < 1: net = slim.dropout(net, dropout_amt, scope='dropout' + str(i)) i += 1 # Flatten while preserving batch and time dimensions. dims = tf.shape(net) net = tf.reshape( net, (dims[0], dims[1], net.shape[2].value * net.shape[3].value), 'flatten_end') net = slim.fully_connected(net, hparams.fc_size, scope='fc_end') net = slim.dropout(net, hparams.fc_dropout_keep_amt, scope='dropout_end') return net
def Encoder_fc3_dropout(x, num_output=85, is_training=True, reuse=False, name="3D_module"): """ 3D inference module. 3 MLP layers (last is the output) With dropout on first 2. Input: - x: N x [|img_feat|, |3D_param|] - reuse: bool Outputs: - 3D params: N x num_output if orthogonal: either 85: (3 + 24*3 + 10) or 109 (3 + 24*4 + 10) for factored axis-angle representation if perspective: 86: (f, tx, ty, tz) + 24*3 + 10, or 110 for factored axis-angle. - variables: tf variables """ if reuse: print('Reuse is on!') with tf.variable_scope(name, reuse=reuse) as scope: net = slim.fully_connected(x, 1024, scope='fc1') net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout1') net = slim.fully_connected(net, 1024, scope='fc2') net = slim.dropout(net, 0.5, is_training=is_training, scope='dropout2') small_xavier = variance_scaling_initializer( factor=.01, mode='FAN_AVG', uniform=True) net = slim.fully_connected( net, num_output, activation_fn=None, weights_initializer=small_xavier, scope='fc3') variables = tf.contrib.framework.get_variables(scope) return net, variables
def construct_net(self,is_trained = True): with slim.arg_scope([slim.conv2d], padding='VALID', weights_initializer=tf.truncated_normal_initializer(stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.conv2d(self.input_images,6,[5,5],1,padding='SAME',scope='conv1') net = slim.max_pool2d(net, [2, 2], scope='pool2') net = slim.conv2d(net,16,[5,5],1,scope='conv3') net = slim.max_pool2d(net, [2, 2], scope='pool4') net = slim.conv2d(net,120,[5,5],1,scope='conv5') net = slim.flatten(net, scope='flat6') net = slim.fully_connected(net, 84, scope='fc7') net = slim.dropout(net, self.dropout,is_training=is_trained, scope='dropout8') digits = slim.fully_connected(net, 10, scope='fc9') return digits
def inference(self): x = tf.reshape(self.x, shape=[-1, self.input_shape[0], self.input_shape[1], self.input_shape[2]]) # scale (divide by MNIST std) x = x * 0.0125 with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_initializer=tf.contrib.layers.xavier_initializer(), weights_regularizer=slim.l2_regularizer(0.0005)): model = slim.conv2d(x, 20, [5, 5], padding='VALID', scope='conv1') model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool1') model = slim.conv2d(model, 50, [5, 5], padding='VALID', scope='conv2') model = slim.max_pool2d(model, [2, 2], padding='VALID', scope='pool2') model = slim.flatten(model) model = slim.fully_connected(model, 500, scope='fc1') model = slim.dropout(model, 0.5, is_training=self.is_training, scope='do1') model = slim.fully_connected(model, self.nclasses, activation_fn=None, scope='fc2') return model
def __init__(self,is_training): z_dim = FLAGS.z_dim batch_size = FLAGS.batch_size self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,64,64,3],name='input_image') self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd') self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label') self.keep_prob = tf.placeholder(tf.float32) weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay) flatten_hist = tf.reshape(self.input_image,[-1,3*64*64]) # flatten_hist = slim.fully_connected(flatten_hist, 1024,weights_regularizer=weights_regularizer,scope='fig/fc_1') # flatten_hist = slim.fully_connected(flatten_hist, 256,weights_regularizer=weights_regularizer, scope='fig/fc_2') # flatten_hist = slim.fully_connected(flatten_hist, 25,weights_regularizer=weights_regularizer, scope='fig/fc_3') self.image_feature_encoder = flatten_hist self.image_feature_decoder = flatten_hist ############## Q(z|X) ############### ############## Sample_z ############### eps = tf.random_normal(shape=[batch_size,z_dim]) # self.sample_z = z_miu + tf.exp(z_logvar / 2) * eps self.sample_z = eps ############## P(X|z) ############### x = tf.concat([self.input_nlcd,self.image_feature_decoder,self.sample_z],1) x = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1') x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2') x = slim.fully_connected(x, 512,weights_regularizer=weights_regularizer, scope='decoder/fc_3') x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) self.logits = slim.fully_connected(x, 100, activation_fn=None, weights_regularizer=weights_regularizer,scope='decoder/logits') self.output = tf.sigmoid(self.logits,name='decoder/output')
def AddDropout(self, prev_layer, index): """Adds a dropout layer. Args: prev_layer: Input tensor. index: Position in model_str to start parsing Returns: Output tensor, end index in model_str. """ pattern = re.compile(R'(Do)({\w+})?') m = pattern.match(self.model_str, index) if m is None: return None, None name = self._GetLayerName(m.group(0), index, m.group(2)) layer = slim.dropout( prev_layer, 0.5, is_training=self.is_training, scope=name) return layer, m.end()
def inception_v3(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, prediction_fn=slim.softmax, spatial_squeeze=True, reuse=None, scope='InceptionV3'): with tf.variable_scope(scope,'InceptionV3',[inputs,num_classes],reuse=reuse) as scope: with slim.arg_scope([slim.batch_norm,slim.dropout], is_training=is_training): net,end_points=inception_v3_base(inputs,scope=scope) with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): aux_logits = end_points['mixed_6e'] with tf.variable_scope('AuxLogits'): aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, padding='VALID', scope='avgpool_1a_5x5') aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='conv2d_1b_1x1') aux_logits = slim.conv2d( aux_logits, 768, [5, 5], weights_initializer=trunc_normal(0.01), padding='VALID', scope='conv2d_2a_5x5' ) aux_logits = slim.conv2d( aux_logits, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, weights_initializer=trunc_normal(0.001), scope='conv2d_2b_1x1' ) if spatial_squeeze: aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze') end_points['AuxLogits'] = aux_logits with tf.variable_scope('Logits'): net = slim.avg_pool2d(net, [8, 8], padding='VALID', scope='avgpool_1a_8x8') net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='dropout_1b') end_points['PreLogits'] = net logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, normalizer_fn=None, scope='conv2d_1c_1x1') if spatial_squeeze: logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') end_points['Logits'] = logits end_points['Predictions'] = prediction_fn(logits, scope='Predictions') return logits, end_points
def __init__(self,is_training): self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_image') self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label') self.keep_prob = tf.placeholder(tf.float32) weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay) x = slim.fully_connected(self.input_nlcd, 256,weights_regularizer=weights_regularizer,scope='fc/fc_1') x = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='fc/fc_2') x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer, scope='fc/fc_3') x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) x = slim.fully_connected(inputs=x, num_outputs=100, activation_fn=None, biases_initializer=None, weights_regularizer=weights_regularizer,scope='fc/fc_4') self.output = tf.sigmoid(x)
def conv_net(x,is_training): # "updates_collections": None is very import ,without will only get 0.10 batch_norm_params = {"is_training": is_training, "decay": 0.9, "updates_collections": None} #,'variables_collections': [ tf.GraphKeys.TRAINABLE_VARIABLES ] with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(mean=0.0, stddev=0.01), weights_regularizer=slim.l2_regularizer(0.0005), normalizer_fn=slim.batch_norm, normalizer_params=batch_norm_params): with tf.variable_scope("ConvNet",reuse=tf.AUTO_REUSE): x = tf.reshape(x, [-1, 28, 28, 1]) net = slim.conv2d(x, 6, [5,5], scope="conv_1") net = slim.max_pool2d(net, [2, 2],scope="pool_1") net = slim.conv2d(net, 12, [5,5], scope="conv_2") net = slim.max_pool2d(net, [2, 2], scope="pool_2") net = slim.flatten(net, scope="flatten") net = slim.fully_connected(net, 100, scope="fc") net = slim.dropout(net,is_training=is_training) net = slim.fully_connected(net, num_classes, scope="prob", activation_fn=None,normalizer_fn=None) return net
def fast_rcnn_net(self): with tf.variable_scope('fast_rcnn_net'): with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer(self.weight_decay)): flatten_rois_features = slim.flatten(self.fast_rcnn_all_level_rois) net = slim.fully_connected(flatten_rois_features, 1024, scope='fc_1') if self.use_dropout: net = slim.dropout(net, keep_prob=0.5, is_training=self.is_training, scope='dropout') net = slim.fully_connected(net, 1024, scope='fc_2') fast_rcnn_scores = slim.fully_connected(net, self.num_classes + 1, activation_fn=None, scope='classifier') fast_rcnn_encode_boxes = slim.fully_connected(net, self.num_classes * 5, activation_fn=None, scope='regressor') return fast_rcnn_encode_boxes, fast_rcnn_scores
def inference_network(x): """Inference network to parameterize variational model. It takes data as input and outputs the variational parameters. loc, scale = neural_network(x) """ with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.elu, normalizer_fn=slim.batch_norm, normalizer_params={'scale': True}): net = tf.reshape(x, [M, 28, 28, 1]) net = slim.conv2d(net, 32, 5, stride=2) net = slim.conv2d(net, 64, 5, stride=2) net = slim.conv2d(net, 128, 5, padding='VALID') net = slim.dropout(net, 0.9) net = slim.flatten(net) params = slim.fully_connected(net, d * 2, activation_fn=None) loc = params[:, :d] scale = tf.nn.softplus(params[:, d:]) return loc, scale
def inference_network(x, xwidth=28, xheight=28, zdim=2): """Inference network to parameterize variational model. It takes data as input and outputs the variational parameters. mu, sigma = neural_network(x) """ with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.elu, normalizer_fn=slim.batch_norm, normalizer_params={'scale': True}): net = tf.reshape(x, [N_MINIBATCH, 28, 28, 1]) net = slim.conv2d(net, 32, 5, stride=2) net = slim.conv2d(net, 64, 5, stride=2) net = slim.conv2d(net, 128, 5, padding='VALID') net = slim.dropout(net, 0.9) net = slim.flatten(net) params = slim.fully_connected(net, zdim * 2, activation_fn=None) mu = params[:, :zdim] #sigma = tf.nn.softplus(params[:, zdim:]) sigma = params[:, zdim:] return mu, sigma
def __init__(self,is_training): z_dim = FLAGS.z_dim self.input_image = tf.placeholder(dtype=tf.float32,shape=[None,3,128],name='input_image') self.input_nlcd = tf.placeholder(dtype=tf.float32,shape=[None,15],name='input_nlcd') self.input_label = tf.placeholder(dtype=tf.float32,shape=[None,100],name='input_label') self.keep_prob = tf.placeholder(tf.float32) weights_regularizer=slim.l2_regularizer(FLAGS.weight_decay) ############## image feature ######## # batch_norm = slim.batch_norm # batch_norm_params = {'is_training':is_training,'updates_collections':tf.GraphKeys.UPDATE_OPS,'decay':0.9,'epsilon':0.00001} #Padding: conv2d default is 'SAME' #Padding: pool2d default is 'VALID' # x = tf.expand_dims(self.input_image,-1) # x = slim.conv2d(scope='encoder/conv1',inputs=x,num_outputs=16,kernel_size=[3,3],stride=[3,1], # normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer) # x = slim.max_pool2d(scope='encoder/pool1',inputs=x,kernel_size=[3,2],stride=[3,2],padding='SAME') # x = slim.conv2d(scope='encoder/conv2',inputs=x,num_outputs=32,kernel_size=[1,3],stride=[1,1], # normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer) # x = slim.max_pool2d(scope='encoder/pool2',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME') # x = slim.conv2d(scope='encoder/conv3',inputs=x,num_outputs=64,kernel_size=[1,3],stride=[1,1], # normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer) # x = slim.max_pool2d(scope='encoder/pool3',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME') # x = slim.conv2d(scope='encoder/conv4',inputs=x,num_outputs=128,kernel_size=[1,3],stride=[1,1], # normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer) # x = slim.max_pool2d(scope='encoder/pool4',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME') # x = slim.conv2d(scope='encoder/conv5',inputs=x,num_outputs=256,kernel_size=[1,3],stride=[1,1], # normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer) # x = slim.max_pool2d(scope='encoder/pool5',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME') # x = slim.conv2d(scope='encoder/conv6',inputs=x,num_outputs=512,kernel_size=[1,3],stride=[1,1], # normalizer_fn=slim.batch_norm,normalizer_params=batch_norm_params,weights_regularizer = weights_regularizer) # x = slim.max_pool2d(scope='encoder/pool6',inputs=x,kernel_size=[1,2],stride=[1,2],padding='SAME') # x = tf.reshape(x,[-1,512]) # x = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_1') # self.image_feature_encoder = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_2') flatten_hist = tf.reshape(self.input_image,[-1,3*128]) # x = slim.fully_connected(flatten_hist, 256,weights_regularizer=weights_regularizer,scope='encoder/hist/fc_1') # x = slim.fully_connected(x, 256,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_2') # x = slim.fully_connected(x, 100,weights_regularizer=weights_regularizer, scope='encoder/hist/fc_3') # self.image_feature_encoder = x self.image_feature_encoder = flatten_hist #self.image_feature_encoder = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) ############## Q(z|X) ############### input_x = tf.concat([self.input_nlcd,self.image_feature_encoder,self.input_label],1) #input_x = tf.concat([self.input_nlcd,self.input_label],1) #input_x = slim.dropout(input_x,keep_prob=self.keep_prob,is_training=is_training) x = slim.fully_connected(input_x, 512,weights_regularizer=weights_regularizer,scope='encoder/fc_1') x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='encoder/fc_2') x = slim.fully_connected(x, 499,weights_regularizer=weights_regularizer, scope='encoder/fc_3') #x = x+input_x #dropout #x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) self.z_miu = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='encoder/z_miu') z_logvar = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='encoder/z_logvar') ############## Sample_z ############### # eps = tf.random_normal(shape=tf.shape(z_miu)) # sample_z = z_miu + tf.exp(z_logvar / 2) * eps condition = tf.concat([self.input_nlcd,self.image_feature_encoder],1) x = slim.fully_connected(condition, 512,weights_regularizer=weights_regularizer,scope='condition/fc_1') x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='condition/fc_2') x = slim.fully_connected(x, 399,weights_regularizer=weights_regularizer, scope='condition/fc_3') #x = x+condition self.condition_miu = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='condition/z_miu') condition_logvar = slim.fully_connected(x, z_dim, activation_fn=None, weights_regularizer=weights_regularizer,scope='condition/z_logvar') ############## Sample_z ############### eps = tf.random_normal(shape=tf.shape(self.z_miu)) self.sample_z = self.z_miu + tf.exp(z_logvar / 2) * eps ############## P(X|z) ############### flatten_hist = tf.reshape(self.input_image,[-1,3*128]) self.image_feature_decoder = flatten_hist input_x = tf.concat([self.input_nlcd,self.image_feature_decoder,self.sample_z],1) #x = tf.concat([self.input_nlcd,sample_z],1) x = slim.fully_connected(input_x, 512,weights_regularizer=weights_regularizer,scope='decoder/fc_1') x = slim.fully_connected(x, 1024,weights_regularizer=weights_regularizer, scope='decoder/fc_2') x = slim.fully_connected(x, 499,weights_regularizer=weights_regularizer, scope='decoder/fc_3') #x = x+input_x #dropout x = slim.dropout(x,keep_prob=self.keep_prob,is_training=is_training) self.logits = slim.fully_connected(x, 100, activation_fn=None, weights_regularizer=weights_regularizer,scope='decoder/logits') self.output = tf.sigmoid(self.logits,name='decoder/output') # E[log P(X|z)] self.recon_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(logits=self.logits, labels=self.input_label), 1)) tf.summary.scalar('recon_loss',self.recon_loss) # D_KL(Q(z|X) || P(z|X)); calculate in closed form as both dist. are Gaussian #self.kl_loss = tf.reduce_mean(0.5 * tf.reduce_sum(tf.exp(z_logvar) + z_miu**2 - 1. - z_logvar, 1)) self.kl_loss = tf.reduce_mean(gaussian_kld(self.z_miu,z_logvar,self.condition_miu,condition_logvar)) tf.summary.scalar('kl_loss',self.kl_loss) # VAE loss self.vae_loss = self.recon_loss + self.kl_loss slim.losses.add_loss(self.vae_loss) tf.summary.scalar('vae_loss',self.vae_loss) # l2 loss self.l2_loss = tf.add_n(slim.losses.get_regularization_losses()) tf.summary.scalar('l2_loss',self.l2_loss) #total loss self.total_loss = slim.losses.get_total_loss() tf.summary.scalar('total_loss',self.total_loss) # self.g_output = tf.sigmoid(x) # self.ce_loss = tf.reduce_mean(tf.reduce_sum(tf.nn.sigmoid_cross_entropy_with_logits(labels=self.input_label,logits=x),1)) # tf.summary.scalar('ce_loss',self.ce_loss) # slim.losses.add_loss(self.ce_loss) # self.l2_loss = tf.add_n(slim.losses.get_regularization_losses()) # tf.summary.scalar('l2_loss',self.l2_loss) # self.total_loss = slim.losses.get_total_loss() # tf.summary.scalar('total_loss',self.total_loss) # self.output = tf.sigmoid(x)
def build_graph(top_k): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag') with tf.device('/cpu:0'): # network: conv2d->max_pool2d->conv2d->max_pool2d->conv2d->max_pool2d->conv2d->conv2d-> # max_pool2d->fully_connected->fully_connected # 给slim.conv2d和slim.fully_connected准备了默认参数:batch_norm with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}): # conv3_1 = slim.conv2d(images, 64, (3, 3), 1, padding='SAME', scope='conv3_1') # conv3_2 = slim.conv2d(conv3_1, 64, (3, 3), 1, padding='SAME', scope='conv3_2') # max_pool_1 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool1') # conv3_3 = slim.conv2d(max_pool_1, 128, (3, 3), 1, padding='SAME', scope='conv3_3') # conv3_4 = slim.conv2d(conv3_3, 128, (3, 3), 1, padding='SAME', scope='conv3_4') # max_pool_2 = slim.max_pool2d(conv3_4, [2, 2], [2, 2], padding='SAME', scope='pool2') # conv3_5 = slim.conv2d(max_pool_2, 256, (3, 3), 1, padding="SAME", scope='conv3_5') # conv3_6 = slim.conv2d(conv3_5, 256, (3, 3), 1, padding="SAME", scope='conv3_6') # conv3_7 = slim.conv2d(conv3_6, 256, (3, 3), 1, padding="SAME", scope='conv3_7') # conv3_8 = slim.conv2d(conv3_7, 256, (3, 3), 1, padding="SAME", scope='conv3_8') # max_pool_3 = slim.max_pool2d(conv3_8, [2, 2], [2, 2], padding="SAME", scope='pool3') # conv3_9 = slim.conv2d(max_pool_3, 512, (3, 3), 1, padding="SAME", scope='conv3_9') # conv3_10 = slim.conv2d(conv3_9, 512, (3, 3), 1, padding="SAME", scope='conv3_10') # conv3_11 = slim.conv2d(conv3_10, 512, (3, 3), 1, padding="SAME", scope='conv3_11') # conv3_12 = slim.conv2d(conv3_11, 512, (3, 3), 1, padding="SAME", scope='conv3_12') # # max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4') # max_pool_4 = slim.max_pool2d(conv3_12, [2, 2], [2, 2], padding='VALID', scope='pool4') conv3_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv3_1') max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2], padding='SAME', scope='pool1') conv3_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv3_2') max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool2') conv3_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3_3') max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2], padding='SAME', scope='pool3') conv3_4 = slim.conv2d(max_pool_3, 512, [3, 3], padding='SAME', scope='conv3_4') conv3_5 = slim.conv2d(conv3_4, 512, [3, 3], padding='SAME', scope='conv3_5') max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='VALID', scope='pool4') # 将输入扁平化,但是保持batch_size flatten = slim.flatten(max_pool_4) # 注意随机失活是作用于数据,是把输入的数据按概率变为0,并且把未失活的数据变大让总期望不变. fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob=keep_prob), 1024, activation_fn=tf.nn.relu, scope='fc1') # fc2 = slim.fully_connected(slim.dropout(fc1, keep_prob=keep_prob), 1024, activation_fn=tf.nn.relu, # scope='fc2') # 最终输出要分类的类的数目,得到的值便是所有样本在各个类上面的最终得分 logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy_top_1 = tf.reduce_mean( tf.cast(tf.equal(tf.arg_max(logits, 1), labels), tf.float32)) # 获取bn层的信息 update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) # learning_rate = tf.train.exponential_decay(learning_rate=FLAGS.start_learning_rate, global_step=global_step, # decay_rate=FLAGS.decay_rate, decay_steps=FLAGS.decay_steps, # staircase=True) optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) # 创造一个操作,用来计算梯度,并且返回loss train_operation = slim.learning.create_train_op( loss, optimizer=optimizer, global_step=global_step) probabilities = tf.nn.softmax(logits) # 绘制loss accuracy曲线 tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy_top_1', accuracy_top_1) merged_summary_op = tf.summary.merge_all() # 返回top k 个预测结果及其概率; predicted_prob_top_k, predicted_index_top_k = tf.nn.top_k( probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean( tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return { 'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_operation': train_operation, 'loss': loss, 'is_training': is_training, 'accuracy_top_1': accuracy_top_1, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_prob_top_k': predicted_prob_top_k }
def build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer( mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 3, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') self._act_summaries.append(net) self._layers['head'] = net # build the anchors for the image self._anchor_component() # generate self._anchors, [] # rpn rpn = slim.conv2d(net, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois") # ind, x1, y1, x2, y2, score rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor") # # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, _ = self._proposal_target_layer( rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois") elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois") else: raise NotImplementedError # rcnn if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(net, rois, "pool5") else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') #4096 if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') #0.5 fc7 = slim.fully_connected(fc6, 4096, scope='fc7') #4096 if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') #0.5 cls_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score') cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def get_model(model_in, dropout_keeprate_node, train_config, scope): net = model_in with tf.variable_scope(name_or_scope=scope, values=[model_in]): # batch norm arg_scope with slim.arg_scope([train_config.normalizer_fn], decay=train_config.batch_norm_decay, fused=train_config.batch_norm_fused, is_training=train_config.is_trainable, activation_fn=train_config.activation_fn): if train_config.normalizer_fn == None: conv_activation_fn = train_config.activation_fn else: conv_activation_fn = None # max_pool arg_scope with slim.arg_scope([slim.max_pool2d], stride=model_config['maxpool_stride'], kernel_size=model_config['maxpool_ksize'], padding='VALID'): # convolutional layer arg_scope with slim.arg_scope( [slim.conv2d], kernel_size=model_config['conv_ksize'], stride=model_config['conv_stride'], weights_initializer=train_config.weights_initializer, weights_regularizer=train_config.weights_regularizer, biases_initializer=train_config.biases_initializer, trainable=train_config.is_trainable, activation_fn=conv_activation_fn, normalizer_fn=train_config.normalizer_fn): net = slim.conv2d(inputs=net, num_outputs=model_chout_num['c1'], padding='SAME', scope='c1_conv') net = slim.max_pool2d(inputs=net, scope='s2_pool') net = slim.conv2d(inputs=net, num_outputs=model_chout_num['c3'], padding='VALID', scope='c3_conv') net = slim.max_pool2d(inputs=net, scope='s4_pool') net = slim.conv2d(inputs=net, num_outputs=model_chout_num['c5'], padding='VALID', scope='c5_conv') # output layer by fully-connected layer with slim.arg_scope([slim.fully_connected], trainable=train_config.is_trainable): with slim.arg_scope([slim.dropout], keep_prob=dropout_keeprate_node, is_training=train_config.is_trainable): net = slim.fully_connected( inputs=net, num_outputs=model_chout_num['f6'], activation_fn=train_config.activation_fn, scope='f6_fc') net = slim.dropout(inputs=net, scope='f6_dropout') net = slim.fully_connected(inputs=net, num_outputs=model_chout_num['out'], activation_fn=None, scope='out_fc') out_logit = slim.dropout(inputs=net, scope='out_dropout') out_logit = tf.reshape(out_logit, shape=[-1, model_chout_num['out']]) return out_logit
def _build_network(images, num_outputs, alpha, keep_prob=1.0, is_training=True, scope='yolo'): with tf.variable_scope(scope): with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=leaky_relu(alpha), weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer((0.0005)), variables_collections='Variables'): net = tf.pad(images, np.array([[0, 0], [3, 3], [3, 3], [0, 0]]), name='pad_1') net = slim.conv2d(net, 64, 7, 2, padding='VALID', scope='conv_2', trainable=False) net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_3') net = slim.conv2d(net, 192, 3, scope='conv_4', trainable=False) net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_5') net = slim.conv2d(net, 128, 1, scope='conv_6') net = slim.conv2d(net, 256, 3, scope='conv_7') net = slim.conv2d(net, 256, 1, scope='conv_8') net = slim.conv2d(net, 512, 3, scope='conv_9') net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_10') net = slim.conv2d(net, 256, 1, scope='conv_11') net = slim.conv2d(net, 512, 3, scope='conv_12') net = slim.conv2d(net, 256, 1, scope='conv_13') net = slim.conv2d(net, 512, 3, scope='conv_14') net = slim.conv2d(net, 256, 1, scope='conv_15') net = slim.conv2d(net, 512, 3, scope='conv_16') net = slim.conv2d(net, 256, 1, scope='conv_17') net = slim.conv2d(net, 512, 3, scope='conv_18') net = slim.conv2d(net, 512, 1, scope='conv_19') # tf.summary.histogram('conv19', net) net = slim.conv2d(net, 1024, 3, scope='conv_20') # tf.summary.histogram('conv20', net) net = slim.max_pool2d(net, 2, padding='SAME', scope='pool_21') net = slim.conv2d(net, 512, 1, scope='conv_22') net = slim.conv2d(net, 1024, 3, scope='conv_23') net = slim.conv2d(net, 512, 1, scope='conv_24') net = slim.conv2d(net, 1024, 3, scope='conv_25') net = slim.conv2d(net, 1024, 3, scope='conv_26') # tf.summary.histogram('conv26', net) net = tf.pad(net, np.array([[0, 0], [1, 1], [1, 1], [0, 0]]), name='pad_27') net = slim.conv2d(net, 1024, 3, 2, padding='VALID', scope='conv_28') net = slim.conv2d(net, 1024, 3, scope='conv_29') net = slim.conv2d(net, 1024, 3, scope='conv_30') net = tf.transpose(net, [0, 3, 1, 2], name='trans_31') net = slim.flatten(net, scope='flat_32') net = slim.fully_connected(net, 512, scope='fc_33') net = slim.fully_connected(net, 4096, scope='fc_34') net = slim.dropout(net, keep_prob=keep_prob, is_training=is_training, scope='dropout_35') net = slim.fully_connected(net, num_outputs, activation_fn=None, scope='fc_36') # net ~ batch * 7 * 7 * 30 return net
def STsingle(inputs, outputs, loss_weight, labels): # Mean subtraction (BGR) for flying chairs mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean") # tf.tile(mean, [4,192,256,1]) inputs = inputs - mean outputs = outputs - mean # Scaling to 0 ~ 1 or -0.4 ~ 0.6? inputs = tf.truediv(inputs, 255.0) outputs = tf.truediv(outputs, 255.0) # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7) outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose, slim.fully_connected], activation_fn=tf.nn.elu): ''' Shared conv layers ''' conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1') # conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1') conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2') pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1') conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1') conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2') pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2') conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1') conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2') conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3') pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3') conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1') conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2') conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3') pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4') conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1') conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2') conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3') pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5') # print pool5.get_shape() ''' Spatial branch ''' flatten5 = slim.flatten(pool5, scope='flatten5') fc6 = slim.fully_connected(flatten5, 4096, scope='fc6') dropout6 = slim.dropout(fc6, 0.9, scope='dropout6') fc7 = slim.fully_connected(dropout6, 4096, scope='fc7') dropout7 = slim.dropout(fc7, 0.9, scope='dropout7') fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8') prob = tf.nn.softmax(fc8) actionPredictions = tf.argmax(prob, 1) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels) actionLoss = tf.reduce_mean(cross_entropy) ''' Temporal branch ''' # Hyper-params for computing unsupervised loss epsilon = 0.0001 alpha_c = 0.3 alpha_s = 0.3 lambda_smooth = 0.8 FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights") scale = 2 # for deconvolution # Expanding part pr5 = slim.conv2d(pool5, 2, [3, 3], activation_fn=None, scope='pr5') h5 = pr5.get_shape()[1].value w5 = pr5.get_shape()[2].value pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5]) pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5]) flow_scale_5 = 0.625 # (*20/32) loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights) upconv4 = slim.conv2d_transpose(pool5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4') pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4') concat4 = tf.concat(3, [pool4, upconv4, pr5to4]) pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4') h4 = pr4.get_shape()[1].value w4 = pr4.get_shape()[2].value pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4]) pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4]) flow_scale_4 = 1.25 # (*20/16) loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights) upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3') pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3') concat3 = tf.concat(3, [pool3, upconv3, pr4to3]) pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3') h3 = pr3.get_shape()[1].value w3 = pr3.get_shape()[2].value pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3]) pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3]) flow_scale_3 = 2.5 # (*20/8) loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights) upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2') pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2') concat2 = tf.concat(3, [pool2, upconv2, pr3to2]) pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2') h2 = pr2.get_shape()[1].value w2 = pr2.get_shape()[2].value pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2]) pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2]) flow_scale_2 = 5.0 # (*20/4) loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights) upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1') pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1') concat1 = tf.concat(3, [pool1, upconv1, pr2to1]) pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1') h1 = pr1.get_shape()[1].value w1 = pr1.get_shape()[2].value pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1]) pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1]) flow_scale_1 = 10.0 # (*20/2) loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights) # Adding intermediate losses all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \ loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[0]*actionLoss slim.losses.add_loss(all_loss) losses = [loss1, loss2, loss3, loss4, loss5, actionLoss] flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5] predictions = [prev1, actionPredictions] return losses, flows_all, predictions
def main(): parser = ArgumentParser() parser.add_argument('--checkpoint', type=str, dest='checkpoint', help='dir or .ckpt file to load checkpoint from', metavar='CHECKPOINT', required=True) parser.add_argument('--out-path', type=str, dest='out_path', help='model output directory', metavar='MODEL_OUT', required=True) opts = parser.parse_args() if not os.path.exists(opts.out_path): os.mkdir(opts.out_path) tf.reset_default_graph() ############################################################################### # graph = build_graph(top_k=3) keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='input') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') conv_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv1') max_pool_1 = slim.max_pool2d(conv_1, [2, 2], [2, 2], padding='SAME') conv_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv2') max_pool_2 = slim.max_pool2d(conv_2, [2, 2], [2, 2], padding='SAME') conv_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3') max_pool_3 = slim.max_pool2d(conv_3, [2, 2], [2, 2], padding='SAME') flatten = slim.flatten(max_pool_3) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.tanh, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), 3755, activation_fn=None, scope='fc2') # flow = tf.cast(logits, tf.uint8, 'output') # logits = slim.fully_connected(flatten, FLAGS.charset_size, activation_fn=None, reuse=reuse, scope='fc') loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) rate = tf.train.exponential_decay(2e-4, global_step, decay_steps=2000, decay_rate=0.97, staircase=True) train_op = tf.train.AdamOptimizer(learning_rate=rate).minimize( loss, global_step=global_step) probabilities = tf.nn.softmax(logits) predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=3) predicted_val_top_k = tf.cast(predicted_val_top_k, tf.float32, 'predicted_val_top_k') predicted_index_top_k = tf.cast(predicted_index_top_k, tf.float32, 'predicted_index_top_k') output = tf.concat([predicted_val_top_k, predicted_index_top_k], -1, name='output') # output = tf. accuracy_in_top_k = tf.reduce_mean( tf.cast(tf.nn.in_top_k(probabilities, labels, 3), tf.float32)) ##################################################################################### saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, opts.checkpoint) predict_val, predict_index = sess.run( [predicted_val_top_k, predicted_index_top_k], feed_dict={ images: np.zeros([1, 64, 64, 1]), keep_prob: 1.0 }) #save graph tf.train.write_graph(sess.graph_def, opts.out_path, 'model.pb') #put graph and parameters together freeze_graph.freeze_graph(opts.out_path + '/model.pb', '', False, opts.checkpoint, 'output', 'save/restore_all', 'save/Const:0', opts.out_path + '/frozen_model.pb', False, "") print("done")
def inference(inputs, num_classes = 2, dropout_keep_prob=0.8, is_training=True, spatial_squeeze=True, scope='vgg_a', fc_conv_padding='VALID', global_pool=False): with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn = tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(stddev=0.1), weights_regularizer=slim.l2_regularizer(0.00001), # 越小惩罚项越重 normalizer_fn=tf.layers.batch_normalization, normalizer_params={'training': is_training, 'momentum': 0.95} ): # 等于下面注释的代码 # 等于下面注释的代码 # 224x224x3 ->56x56x16 end_points = {} with tf.variable_scope('layer1'): net = slim.repeat(inputs, 2, slim.conv2d, 16, [3, 3], scope='conv') net = SE_block(net, 4) net = slim.max_pool2d(net, kernel_size=[4, 4], stride=4, scope='pool') # 32 end_point = "layer1" end_points[end_point] = net # 第二层定义 # 56x56x32 ->14x14x64 with tf.variable_scope('layer2'): net = slim.repeat(net, 2, slim.conv2d, 32, [3, 3], scope='conv') net = slim.max_pool2d(net, kernel_size=[4, 4], stride=4, scope='pool') # 16 net = SE_block(net, 4) end_point = "layer2" end_points[end_point] = net # #第三层 # 14x14x128->3x3 with tf.variable_scope('layer3'): net = slim.repeat(net, 2, slim.conv2d, 64, [3, 3], scope='conv') net = SE_block(net, 4) net = slim.max_pool2d(net, kernel_size=[4, 4], stride=4, scope='pool') # 8 end_point = "layer3" end_points[end_point] = net with tf.variable_scope('layer4'): net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], scope='conv') net = SE_block(net, 4) net = slim.max_pool2d(net, kernel_size=[3, 3], stride=1, scope='pool') end_point = "layer3" end_points[end_point] = net with tf.variable_scope('layer5'): net = slim.flatten(net, scope='flattern') # net = slim.fully_connected(net, 64, scope='fc6') # end_point = "layer4" # end_points[end_point] = net # with tf.variable_scope('layer6'): # net = slim.fully_connected(net, 64, scope='fc6') # end_point = "layer8" # end_points[end_point] = net with tf.variable_scope('layer7'): net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout') net = slim.fully_connected(net, num_classes, activation_fn=None, scope='fc8') return net, end_points
def model_fn(self, is_training=True, *args, **kwargs): batch_image = tf.placeholder(tf.float32, (ctx.params.batch_size, 512, 512, 3), name='image') batch_label = None if is_training: batch_label = tf.placeholder(tf.int32, (ctx.params.batch_size, 512, 512), name='label') # preprocess rgb_channels = tf.split(batch_image, 3, 3) rgb_channels[0] = rgb_channels[0] - 128.0 rgb_channels[1] = rgb_channels[1] - 128.0 rgb_channels[2] = rgb_channels[2] - 128.0 batch_image = tf.concat(rgb_channels, -1) # vgg 16 layers = ( 'conv1_1', 'relu1_1', 'conv1_2', 'relu1_2', 'pool1', 'conv2_1', 'relu2_1', 'conv2_2', 'relu2_2', 'pool2', 'conv3_1', 'relu3_1', 'conv3_2', 'relu3_2', 'conv3_3', 'relu3_3', 'pool3', 'conv4_1', 'relu4_1', 'conv4_2', 'relu4_2', 'conv4_3', 'relu4_3', 'pool4', 'conv5_1', 'relu5_1', 'conv5_2', 'relu5_2', 'conv5_3', 'relu5_3', 'pool5', ) net = batch_image net_collection = {} with slim.arg_scope( [slim.conv2d], weights_regularizer=slim.l2_regularizer(0.0001), normalizer_fn=None, activation_fn=None, weights_initializer=slim.variance_scaling_initializer()): for i, name in enumerate(layers): kind = name[:4] if kind == 'conv': block_i = int(name[4]) output_channels = block_i * 64 if block_i < 5 else 512 net = slim.conv2d(net, output_channels, [3, 3], stride=[1, 1], padding='SAME') net_collection[name] = net elif kind == 'relu': net = tf.nn.relu(net) net_collection[name] = net elif kind == 'pool': net = slim.avg_pool2d(net, 2, stride=2, padding='SAME') net_collection[name] = net pool5_output = net_collection['pool5'] # fully conv conv6 = slim.conv2d(pool5_output, 4096, [7, 7], stride=[1, 1], padding='SAME') relu6 = tf.nn.relu(conv6) relu6 = slim.dropout(relu6, 0.5) conv7 = slim.conv2d(relu6, 4096, [1, 1], stride=[1, 1], padding='SAME') relu7 = tf.nn.relu(conv7) relu7 = slim.dropout(relu7) # FCN32S score_32 = slim.conv2d(relu7, ctx.params.class_num, [1, 1], stride=[1, 1], padding='SAME') score_32_up = slim.convolution2d_transpose(score_32, ctx.params.class_num, [4, 4], [2, 2]) # FCN16S pool4_output = slim.conv2d(net_collection['pool4'], ctx.params.class_num, [1, 1], stride=[1, 1], padding='SAME') score_16 = score_32_up + pool4_output score_16_up = slim.convolution2d_transpose(score_16, ctx.params.class_num, [4, 4], [2, 2]) # FCN8S pool3_output = slim.conv2d(net_collection['pool3'], ctx.params.class_num, [1, 1], stride=[1, 1], padding='SAME') score_8 = score_16_up + pool3_output score_8_up = slim.convolution2d_transpose(score_8, ctx.params.class_num, [4, 4], [2, 2]) if is_training: one_hot_batch_label = tf.one_hot(batch_label, ctx.params.class_num) one_hot_batch_label = tf.image.resize_bilinear( one_hot_batch_label, [128, 128]) # cross entropy fcn8_loss = tf.losses.softmax_cross_entropy( one_hot_batch_label, score_8_up) return fcn8_loss else: logits = tf.nn.softmax(score_8_up) return logits
def build_heads(pyramid, ih, iw, num_classes, base_anchors, is_training=False, gt_boxes=None): """Build the 3-way outputs, i.e., class, box and mask in the pyramid Algo ---- For each layer: 1. Build anchor layer 2. Process the results of anchor layer, decode the output into rois 3. Sample rois 4. Build roi layer 5. Process the results of roi layer, decode the output into boxes 6. Build the mask layer 7. Build losses """ outputs = {} #arg_scope = _extra_conv_arg_scope(activation_fn=None) arg_scope = _extra_conv_arg_scope_with_bn(activation_fn=None) my_sigmoid = None with slim.arg_scope(arg_scope): with tf.variable_scope('pyramid'): # for p in pyramid: outputs['rpn'] = {} for i in range(5, 1, -1): p = 'P%d'%i stride = 2 ** i ## rpn head shape = tf.shape(pyramid[p]) height, width = shape[1], shape[2] rpn = slim.conv2d(pyramid[p], 256, [3, 3], stride=1, activation_fn=tf.nn.relu, scope='%s/rpn'%p) box = slim.conv2d(rpn, base_anchors * 4, [1, 1], stride=1, scope='%s/rpn/box' % p, \ weights_initializer=tf.truncated_normal_initializer(stddev=0.001), activation_fn=my_sigmoid) cls = slim.conv2d(rpn, base_anchors * 2, [1, 1], stride=1, scope='%s/rpn/cls' % p, \ weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) anchor_scales = [2 **(i-2), 2 ** (i-1), 2 **(i)] print("anchor_scales = " , anchor_scales) all_anchors = gen_all_anchors(height, width, stride, anchor_scales) outputs['rpn'][p]={'box':box, 'cls':cls, 'anchor':all_anchors} ## gather all rois # print (outputs['rpn']) rpn_boxes = [tf.reshape(outputs['rpn']['P%d'%p]['box'], [-1, 4]) for p in range(5, 1, -1)] rpn_clses = [tf.reshape(outputs['rpn']['P%d'%p]['cls'], [-1, 1]) for p in range(5, 1, -1)] rpn_anchors = [tf.reshape(outputs['rpn']['P%d'%p]['anchor'], [-1, 4]) for p in range(5, 1, -1)] rpn_boxes = tf.concat(values=rpn_boxes, axis=0) rpn_clses = tf.concat(values=rpn_clses, axis=0) rpn_anchors = tf.concat(values=rpn_anchors, axis=0) outputs['rpn']['box'] = rpn_boxes outputs['rpn']['cls'] = rpn_clses outputs['rpn']['anchor'] = rpn_anchors # outputs['rpn'] = {'box': rpn_boxes, 'cls': rpn_clses, 'anchor': rpn_anchors} rpn_probs = tf.nn.softmax(tf.reshape(rpn_clses, [-1, 2])) rois, roi_clses, scores, = anchor_decoder(rpn_boxes, rpn_probs, rpn_anchors, ih, iw) # rois, scores, batch_inds = sample_rpn_outputs(rois, rpn_probs[:, 1]) rois, scores, batch_inds, mask_rois, mask_scores, mask_batch_inds = \ sample_rpn_outputs_with_gt(rois, rpn_probs[:, 1], gt_boxes, is_training=is_training) # if is_training: # # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes) # rois, scores, batch_inds = _add_jittered_boxes(rois, scores, batch_inds, gt_boxes, jitter=0.2) outputs['roi'] = {'box': rois, 'score': scores} ## cropping regions [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \ assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5]) outputs['assigned_rois'] = assigned_rois outputs['assigned_layer_inds'] = assigned_layer_inds cropped_rois = [] ordered_rois = [] pyramid_feature = [] for i in range(5, 1, -1): print(i) p = 'P%d'%i splitted_rois = assigned_rois[i-2] batch_inds = assigned_batch_inds[i-2] cropped, boxes_in_crop = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i, pooled_height=14, pooled_width=14) # cropped = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i, # pooled_height=14, pooled_width=14) cropped_rois.append(cropped) ordered_rois.append(splitted_rois) pyramid_feature.append(tf.transpose(pyramid[p],[0,3,1,2])) # if i is 5: # outputs['tmp_0'] = tf.transpose(pyramid[p],[0,3,1,2]) # outputs['tmp_1'] = splitted_rois # outputs['tmp_2'] = tf.transpose(cropped,[0,3,1,2]) # outputs['tmp_3'] = boxes_in_crop # outputs['tmp_4'] = [ih, iw] cropped_rois = tf.concat(values=cropped_rois, axis=0) ordered_rois = tf.concat(values=ordered_rois, axis=0) outputs['ordered_rois'] = ordered_rois outputs['pyramid_feature'] = pyramid_feature outputs['roi']['cropped_rois'] = cropped_rois tf.add_to_collection('__CROPPED__', cropped_rois) ## refine head # to 7 x 7 cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME') refine = slim.flatten(cropped_regions) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) cls2 = slim.fully_connected(refine, num_classes, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.05)) box = slim.fully_connected(refine, num_classes*4, activation_fn=my_sigmoid, weights_initializer=tf.truncated_normal_initializer(stddev=0.05)) outputs['refined'] = {'box': box, 'cls': cls2} ## decode refine net outputs cls2_prob = tf.nn.softmax(cls2) final_boxes, classes, scores = \ roi_decoder(box, cls2_prob, ordered_rois, ih, iw) #outputs['tmp_0'] = ordered_rois #outputs['tmp_1'] = assigned_rois #outputs['tmp_2'] = box #outputs['tmp_3'] = final_boxes #outputs['tmp_4'] = cls2_prob #outputs['final_boxes'] = {'box': final_boxes, 'cls': classes} outputs['final_boxes'] = {'box': final_boxes, 'cls': classes, 'prob': cls2_prob} ## for testing, maskrcnn takes refined boxes as inputs if not is_training: rois = final_boxes # [assigned_rois, assigned_batch_inds, assigned_layer_inds] = \ # assign_boxes(rois, [rois, batch_inds], [2, 3, 4, 5]) for i in range(5, 1, -1): p = 'P%d'%i splitted_rois = assigned_rois[i-2] batch_inds = assigned_batch_inds[i-2] cropped, _ = ROIAlign(pyramid[p], splitted_rois, batch_inds, stride=2**i, pooled_height=14, pooled_width=14) cropped_rois.append(cropped) ordered_rois.append(splitted_rois) cropped_rois = tf.concat(values=cropped_rois, axis=0) ordered_rois = tf.concat(values=ordered_rois, axis=0) ## mask head m = cropped_rois for _ in range(4): m = slim.conv2d(m, 256, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu) # to 28 x 28 m = slim.conv2d_transpose(m, 256, 2, stride=2, padding='VALID', activation_fn=tf.nn.relu) tf.add_to_collection('__TRANSPOSED__', m) m = slim.conv2d(m, num_classes, [1, 1], stride=1, padding='VALID', activation_fn=None) # add a mask, given the predicted boxes and classes outputs['mask'] = {'mask':m, 'cls': classes, 'score': scores} return outputs
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, reuse=None, scope='InceptionResnetV2'): """Creates the Inception Resnet V2 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 192 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_5a_3x3') end_points['MaxPool_5a_3x3'] = net # 35 x 35 x 320 with tf.variable_scope('Mixed_5b'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 48, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, scope='Conv2d_0b_5x5') with tf.variable_scope('Branch_2'): tower_conv2_0 = slim.conv2d(net, 64, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, scope='Conv2d_0c_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', scope='AvgPool_0a_3x3') tower_pool_1 = slim.conv2d(tower_pool, 64, 1, scope='Conv2d_0b_1x1') net = tf.concat(3, [ tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1 ]) end_points['Mixed_5b'] = net net = slim.repeat(net, 10, block35, scale=0.17) # 17 x 17 x 1024 with tf.variable_scope('Mixed_6a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1_0 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, scope='Conv2d_0b_3x3') tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(3, [tower_conv, tower_conv1_2, tower_pool]) end_points['Mixed_6a'] = net net = slim.repeat(net, 20, block17, scale=0.10) with tf.variable_scope('Mixed_7a'): with tf.variable_scope('Branch_0'): tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_1'): tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_2'): tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, scope='Conv2d_0b_3x3') tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') with tf.variable_scope('Branch_3'): tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_1a_3x3') net = tf.concat(3, [ tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool ]) end_points['Mixed_7a'] = net net = slim.repeat(net, 9, block8, scale=0.20) net = block8(net, activation_fn=None) net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') end_points['Conv2d_7b_1x1'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net return net, end_points
def vgg(inputs, is_train_phase, drop_prob=1.0): with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer( 0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): net = slim.conv2d(inputs, 64, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.3) net = slim.conv2d(net, 64, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME") net = slim.conv2d(net, 128, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.4) net = slim.conv2d(net, 128, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME") net = slim.conv2d(net, 256, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.4) net = slim.conv2d(net, 256, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.4) net = slim.conv2d(net, 256, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME") net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.4) net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.4) net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME") net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.4) net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.4) net = slim.conv2d(net, 512, [3, 3], stride=[1, 1], padding="SAME") net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.max_pool2d(net, [2, 2], stride=[2, 2], padding="SAME") net = slim.dropout(net, 0.5) net = slim.fully_connected(slim.flatten(net), 512) net = tf.contrib.layers.batch_norm(net, center=True, scale=True, is_training=is_train_phase) net = slim.dropout(net, 0.5) net = slim.fully_connected(net, 10, activation_fn=None) return net
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) end_points['Mixed_5a'] = net # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) end_points['Mixed_6b'] = net cam_conv = net # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) end_points['Mixed_8a'] = net net = block8(net, activation_fn=None) end_points['Mixed_8b'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, cam_conv, end_points
def phoc_prediction(features, phoc_dim, scope, reuse=None, L2_reg=0.0, act_func=tf.nn.relu, large_topology=False, dropout=0.0): with slim.arg_scope(_args_scope(act_func, L2_reg)): with tf.variable_scope(scope, scope, [features], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): if large_topology: phoc = slim.conv2d(features, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc4_phoc') phoc = slim.conv2d(phoc, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc5_phoc') phoc = slim.conv2d(phoc, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc6_phoc') phoc = slim.conv2d(phoc, phoc_dim, [1, 1], stride=1, activation_fn=None, padding='VALID', scope='fc7_phoc') else: phoc = slim.conv2d(features, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc1') phoc = slim.dropout(phoc, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc1') phoc = slim.conv2d(phoc, 1024, [1, 1], stride=1, activation_fn=act_func, padding='VALID', scope='fc2') phoc = slim.dropout(phoc, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc2') phoc = slim.conv2d(phoc, phoc_dim, [1, 1], stride=1, activation_fn=None, padding='VALID', scope='linear') phoc = tf.squeeze(phoc, name='phoc_embd') return phoc
# conv3: 18*18*64 -> 8*8*128 conv3 = slim.conv2d(pool2,384,[2,2],stride=2,scope='conv3') # MNIST conv1: 8*8*128 -> 6*6*256 -> 4*4*256 # no group because I only have 1 GPU conv4 = slim.conv2d(conv3,256,[2,2],stride=1,scope='conv4') pool4 = slim.max_pool2d(conv4,[2,2],stride=1,scope='pool4') # MNIST fc6: 4*4*256 -> 1*1*1024 -> 1024 conv5 = slim.conv2d(pool4,1024,[4,4],stride=1,scope='conv5') fc = slim.flatten(conv5) # MNIST fc6: 1024 -> 1024 fc1 = slim.fully_connected(fc,1024,scope='fc1') drop1 = slim.dropout(fc1,keep_prob) # MNIST fc7: 1024 -> 1024 fc2 = slim.fully_connected(drop1,1024,scope='fc2') drop2 = slim.dropout(fc2,keep_prob) # fc8: 1024 -> 10 y_conv = slim.fully_connected(drop2,10,scope='fc3') loss = slim.losses.softmax_cross_entropy(y_conv, y) train_step = tf.train.AdamOptimizer(1e-4).minimize(loss) correct_prediction = tf.equal(tf.argmax(y_conv,1),tf.argmax(y,1)) accuracy = tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
def create_model(self, model_input, vocab_size, is_training, num_mixtures=None, l2_penalty=1e-8, **unused_params): """Creates a Mixture of (Logistic) Experts model. It also includes the possibility of gating the probabilities The model consists of a per-class softmax distribution over a configurable number of logistic classifiers. One of the classifiers in the mixture is not trained, and always predicts 0. Args: model_input: 'batch_size' x 'num_features' matrix of input features. vocab_size: The number of classes in the dataset. is_training: Is this the training phase ? num_mixtures: The number of mixtures (excluding a dummy 'expert' that always predicts the non-existence of an entity). l2_penalty: How much to penalize the squared magnitudes of parameter values. Returns: A dictionary with a tensor containing the probability predictions of the model in the 'predictions' key. The dimensions of the tensor are batch_size x num_classes. """ num_mixtures = num_mixtures or FLAGS.moe_num_mixtures low_rank_gating = FLAGS.moe_low_rank_gating l2_penalty = FLAGS.moe_l2 gating_probabilities = FLAGS.moe_prob_gating gating_input = FLAGS.moe_prob_gating_input input_size = model_input.get_shape().as_list()[1] remove_diag = FLAGS.gating_remove_diag if low_rank_gating == -1: gate_activations = slim.fully_connected( model_input, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates") else: gate_activations1 = slim.fully_connected( model_input, low_rank_gating, activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates1") gate_activations = slim.fully_connected( gate_activations1, vocab_size * (num_mixtures + 1), activation_fn=None, biases_initializer=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="gates2") expert_activations_hidden = slim.fully_connected( model_input, input_size, activation_fn=tf.sigmoid, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts_hidden") expert_activations_hidden = slim.batch_norm(expert_activations_hidden, center=True, scale=True, is_training=is_training, scope="experts_hidden_bn") expert_activations_hidden = slim.dropout(expert_activations_hidden, 0.5, scope='dropout') expert_activations_hidden = expert_activations_hidden + model_input expert_activations = slim.fully_connected( expert_activations_hidden, vocab_size * num_mixtures, activation_fn=None, weights_regularizer=slim.l2_regularizer(l2_penalty), scope="experts") gating_distribution = tf.nn.softmax( tf.reshape(gate_activations, [-1, num_mixtures + 1 ])) # (Batch * #Labels) x (num_mixtures + 1) expert_distribution = tf.nn.sigmoid( tf.reshape(expert_activations, [-1, num_mixtures])) # (Batch * #Labels) x num_mixtures probabilities_by_class_and_batch = tf.reduce_sum( gating_distribution[:, :num_mixtures] * expert_distribution, 1) probabilities = tf.reshape(probabilities_by_class_and_batch, [-1, vocab_size]) if gating_probabilities: if gating_input == 'prob': gating_weights = tf.get_variable( "gating_prob_weights", [vocab_size, vocab_size], initializer=tf.random_normal_initializer( stddev=1 / math.sqrt(vocab_size))) gates = tf.matmul(probabilities, gating_weights) else: gating_weights = tf.get_variable( "gating_prob_weights", [input_size, vocab_size], initializer=tf.random_normal_initializer( stddev=1 / math.sqrt(vocab_size))) gates = tf.matmul(model_input, gating_weights) if remove_diag: # removes diagonals coefficients diagonals = tf.matrix_diag_part(gating_weights) gates = gates - tf.multiply(diagonals, probabilities) gates = slim.batch_norm(gates, center=True, scale=True, is_training=is_training, scope="gating_prob_bn") gates = tf.sigmoid(gates) probabilities = tf.multiply(probabilities, gates) return {"predictions": probabilities}
def mobilenet_v2(self, input_x, is_training=True, reuse=False, keep_prob=0.8, scope='mobilenet_v2'): # batch_norm parameters # bn_parameters = {'is_training': is_training, 'center':True, 'scale':True, 'decay':0.997} self.num_block = 0 with tf.variable_scope(scope) as scope: if reuse: scope.reuse_variables() with slim.arg_scope([slim.conv2d, slim.separable_conv2d], weights_initializer=tf.truncated_normal_initializer(stddev=0.02), normalizer_fn=slim.batch_norm, #normalizer_params=bn_parameters, activation_fn=tf.nn.relu6),\ slim.arg_scope([slim.dropout], keep_prob=keep_prob) as s: conv0 = slim.conv2d(input_x, 32, 3, stride=1, scope='conv0') # bottleneck_residual_block bottleneck_1_1 = self.inverted_bottleneck_block( conv0, 1, 16, False, is_training=is_training) bottleneck_2_1 = self.inverted_bottleneck_block( bottleneck_1_1, self.width_multiplier, 24, False, is_training=is_training) bottleneck_2_2 = self.inverted_bottleneck_block( bottleneck_2_1, self.width_multiplier, 24, False, is_training=is_training) bottleneck_3_1 = self.inverted_bottleneck_block( bottleneck_2_2, self.width_multiplier, 32, True, is_training=is_training) bottleneck_3_2 = self.inverted_bottleneck_block( bottleneck_3_1, self.width_multiplier, 32, False, is_training=is_training) bottleneck_3_3 = self.inverted_bottleneck_block( bottleneck_3_2, self.width_multiplier, 32, False, is_training=is_training) bottleneck_4_1 = self.inverted_bottleneck_block( bottleneck_3_3, self.width_multiplier, 64, True, is_training=is_training) bottleneck_4_2 = self.inverted_bottleneck_block( bottleneck_4_1, self.width_multiplier, 64, False, is_training=is_training) bottleneck_4_3 = self.inverted_bottleneck_block( bottleneck_4_2, self.width_multiplier, 64, False, is_training=is_training) bottleneck_4_4 = self.inverted_bottleneck_block( bottleneck_4_3, self.width_multiplier, 64, False, is_training=is_training) bottleneck_5_1 = self.inverted_bottleneck_block( bottleneck_4_4, self.width_multiplier, 96, False, is_training=is_training) bottleneck_5_2 = self.inverted_bottleneck_block( bottleneck_5_1, self.width_multiplier, 96, False, is_training=is_training) bottleneck_5_3 = self.inverted_bottleneck_block( bottleneck_5_2, self.width_multiplier, 96, False, is_training=is_training) bottleneck_6_1 = self.inverted_bottleneck_block( bottleneck_5_3, self.width_multiplier, 160, True, is_training=is_training) bottleneck_6_2 = self.inverted_bottleneck_block( bottleneck_6_1, self.width_multiplier, 160, False, is_training=is_training) bottleneck_6_3 = self.inverted_bottleneck_block( bottleneck_6_2, self.width_multiplier, 160, False, is_training=is_training) bottleneck_7_1 = self.inverted_bottleneck_block( bottleneck_6_3, self.width_multiplier, 320, False, is_training=is_training) conv8 = slim.conv2d(bottleneck_7_1, 1280, 3, stride=1, scope='conv8') # global average pooling filter_size = [ conv8.get_shape().as_list()[1], conv8.get_shape().as_list()[2] ] avgpool = slim.avg_pool2d(conv8, filter_size, scope='avgpool') dropout = slim.dropout(avgpool) logits = tf.squeeze( slim.conv2d(dropout, self.num_class, 1, stride=1, activation_fn=None, normalizer_fn=None)) return logits
def build_network(self, sess, is_training=True): with tf.variable_scope('vgg_16', 'vgg_16'): # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer( mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) net = slim.repeat(self._image, 2, slim.conv2d, 64, [3, 3], trainable=False, scope='conv1') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool1') net = slim.repeat(net, 2, slim.conv2d, 128, [3, 3], trainable=False, scope='conv2') net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool2') net = slim.repeat(net, 3, slim.conv2d, 256, [3, 3], trainable=is_training, scope='conv3') #continue conv4 net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool3') net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv4') #store conv4_3 self.endpoint['conv4_2'] = net #continue conv5/conv5_3 net = slim.conv2d(net, 512, [3, 3], trainable=is_training, scope='conv4/conv4_3') #store conv4_3 self.endpoint['conv4_3'] = net #continue conv5 net = slim.max_pool2d(net, [2, 2], padding='SAME', scope='pool4') net = slim.repeat(net, 2, slim.conv2d, 512, [3, 3], trainable=is_training, scope='conv5') #store conv5_2 self.endpoint['conv5_2'] = net #continue conv5/conv5_3 net = slim.conv2d(net, 512, [3, 3], trainable=is_training, scope='conv5/conv5_3') #store conv5_3 self.endpoint['conv5_3'] = net self._layers['head'] = self.endpoint['conv5_3'] # build the anchors for the image self._anchor_component() ###############################################RPN START#################################################################### #-----------------------------------------------rpn 3------------------------------------------------------------# conv3_resize = slim.avg_pool2d(self.endpoint['conv4_2'], [2, 2], padding='SAME', scope='conv3_resize') # rpn 3 rpn3 = slim.conv2d(conv3_resize, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn3_conv/3x3") #combine scale_rpn3 = tf.Variable(tf.cast(1, tf.float32), trainable=is_training, name='scale_rpn3') rpn3 = tf.scalar_mul(scale_rpn3, rpn3) self._act_summaries.append(rpn3) rpn3_cls_score = slim.conv2d(rpn3, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn3_cls_score') # change it so that the score has 2 as its channel size rpn3_cls_score_reshape = self._reshape_layer( rpn3_cls_score, 2, 'rpn3_cls_score_reshape') rpn3_cls_prob_reshape = self._softmax_layer( rpn3_cls_score_reshape, "rpn3_cls_prob_reshape") rpn3_cls_prob = self._reshape_layer(rpn3_cls_prob_reshape, self._num_anchors * 2, "rpn3_cls_prob") rpn3_cls_prob_reshape = tf.reshape(rpn3_cls_prob, [-1, 2]) rpn3_reject_inds = tf.where( tf.greater(rpn3_cls_prob_reshape[:, 0], rpn3_reject)) if is_training: #compute anchor1 loss rpn3_labels = self._anchor_target_layer( rpn3_cls_score, "anchor3", [], rpn_batch3, OHEM3) #store3 rpn values self._predictions[ "rpn3_cls_score_reshape"] = rpn3_cls_score_reshape #-----------------------------------------------rpn 2------------------------------------------------------------## conv4_resize = slim.avg_pool2d(self.endpoint['conv4_3'], [2, 2], padding='SAME', scope='conv4_3_resize') rpn2 = slim.conv2d(conv4_resize, 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn2_conv/3x3") #combine scale_rpn2 = tf.Variable(tf.cast(1, tf.float32), trainable=is_training, name='scale_rpn2') rpn2 = tf.scalar_mul(scale_rpn2, rpn2) rpn2 = self._score_add_up(rpn3, rpn2, factor1, factor2, 'rpn2') self._act_summaries.append(rpn2) rpn2_cls_score = slim.conv2d(rpn2, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn2_cls_score_pre') #add up 2 scores rpn1 and rpn rpn2_cls_score = self._score_add_up(rpn3_cls_score, rpn2_cls_score, factor1, factor2, 'rpn2_cls_score') #used added up score rpn2_cls_score_reshape = self._reshape_layer( rpn2_cls_score, 2, 'rpn2_cls_score_reshape') rpn2_cls_prob_reshape = self._softmax_layer( rpn2_cls_score_reshape, "rpn2_cls_prob_reshape") rpn2_cls_prob = self._reshape_layer(rpn2_cls_prob_reshape, self._num_anchors * 2, "rpn2_cls_prob") rpn2_cls_prob_reshape = tf.reshape(rpn2_cls_prob, [-1, 2]) rpn2_reject_inds = tf.where( tf.greater(rpn2_cls_prob_reshape[:, 0], rpn2_reject)) rpn2_reject_inds = tf.concat([rpn3_reject_inds, rpn2_reject_inds], 0) if is_training: #compute anchor1 loss rpn2_labels = self._anchor_target_layer( rpn2_cls_score, "anchor2", rpn3_reject_inds, rpn_batch2, OHEM2) #store2 rpn values self._predictions[ "rpn2_cls_score_reshape"] = rpn2_cls_score_reshape ##-----------------------------------------------rpn 1------------------------------------------------------------## # rpn 1 rpn1 = slim.conv2d(self.endpoint['conv5_2'], 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn1_conv/3x3") scale_rpn1 = tf.Variable(tf.cast(1, tf.float32), trainable=is_training, name='scale_rpn1') rpn1 = tf.scalar_mul(scale_rpn1, rpn1) rpn1 = self._score_add_up(rpn2, rpn1, factor1, factor2, 'rpn1') self._act_summaries.append(rpn1) rpn1_cls_score = slim.conv2d(rpn1, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn1_cls_score_pre') rpn1_cls_score = self._score_add_up(rpn2_cls_score, rpn1_cls_score, factor1, factor2, 'rpn1_cls_score') # change it so that the score has 2 as its channel size rpn1_cls_score_reshape = self._reshape_layer( rpn1_cls_score, 2, 'rpn1_cls_score_reshape') rpn1_cls_prob_reshape = self._softmax_layer( rpn1_cls_score_reshape, "rpn1_cls_prob_reshape") rpn1_cls_prob = self._reshape_layer(rpn1_cls_prob_reshape, self._num_anchors * 2, "rpn1_cls_prob") rpn1_cls_prob_reshape = tf.reshape(rpn1_cls_prob, [-1, 2]) rpn1_reject_inds = tf.where( tf.greater(rpn1_cls_prob_reshape[:, 0], rpn1_reject)) rpn1_reject_inds = tf.concat([rpn2_reject_inds, rpn1_reject_inds], 0) if is_training: #compute anchor1 loss rpn1_labels = self._anchor_target_layer( rpn1_cls_score, "anchor1", rpn2_reject_inds, rpn_batch2, OHEM1) #store1 rpn values self._predictions[ "rpn1_cls_score_reshape"] = rpn1_cls_score_reshape ##---------------------------------------------rpn 1 done------------------------------------------------------------## ##-----------------------------------------------rpn-----------------------------------------------------------------## rpn = slim.conv2d(self.endpoint['conv5_3'], 512, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3") scale_rpn = tf.Variable(tf.cast(1, tf.float32), trainable=is_training, name='scale_rpn') rpn = tf.scalar_mul(scale_rpn, rpn) rpn = self._score_add_up(rpn1, rpn, factor1, factor2, 'rpn') self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score_pre') rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') #add up 2 scores rpn1 and rpn rpn_cls_score = self._score_add_up(rpn1_cls_score, rpn_cls_score, factor1, factor2, 'rpn_cls_score') #used added up score rpn_cls_score_reshape = self._reshape_layer( rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") if is_training: #compute anchor loss rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor", rpn1_reject_inds, rpn_batch1, OHEM) ######################################################RPN DONE################################################################## #---------------------------------------------------porposal is made here------------------------------------------------------# if is_training: # #compute anchor loss # rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor", rpn1_reject_inds) rois, roi_scores = self._proposal_layer( rpn_cls_prob, rpn_bbox_pred, "rois", rpn1_reject_inds) # with tf.control_dependencies([rpn_labels]): # rois, _ = self._proposal_target_layer(rois, roi_scores, "rpn_rois") else: if cfg.TEST.MODE == 'nms': rois, _ = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", rpn1_reject_inds) elif cfg.TEST.MODE == 'top': rois, _ = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois", rpn1_reject_inds) else: raise NotImplementedError #----------------------------------------------------------finish proposal-----------------------------------------------------# #############################################################RCNN START############################################################### #------------------------------------------------------rcnn 3----------------------------------------------------# # rcnn # generate target if is_training: with tf.control_dependencies([rpn_labels]): rois, _, passinds3 = self._proposal_target_layer( rois, roi_scores, "rpn3_rois", batch3) if cfg.POOLING_MODE == 'crop': pool31 = self._crop_pool_layer(conv3_resize, rois, "pool31") else: raise NotImplementedError pool31_conv = slim.conv2d(pool31, 256, [1, 1], trainable=is_training, weights_initializer=initializer, scope="pool31_conv") pool31_avg = slim.avg_pool2d(pool31_conv, [7, 7], padding='SAME', scope='pool31_avg', stride=1) pool31_flat = slim.flatten(pool31_avg, scope='flatten31') fc3_2 = slim.fully_connected( pool31_flat, 512, scope='fc3_2', weights_initializer=tf.contrib.layers.xavier_initializer(), trainable=is_training) # if is_training: # fc3_2 = slim.dropout(fc3_2, keep_prob=0.5, is_training=True, scope='fc3_2') #combine scale3_2 = tf.Variable(tf.cast(1, tf.float32), trainable=is_training, name='scale3_2') fc_combine3_2 = tf.scalar_mul(scale3_2, fc3_2) cls3_score = slim.fully_connected(fc_combine3_2, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls3_score') #store RCNN3 self._predictions["cls3_score"] = cls3_score cls3_prob = self._softmax_layer(cls3_score, "cls3_prob") #reject cls3_inds = tf.reshape(tf.where(tf.less(cls3_prob[:, 0], reject3)), [-1]) rois = tf.gather(rois, tf.reshape(cls3_inds, [-1])) #fc_combine3_2 = tf.gather(fc_combine3_2, tf.reshape(cls3_inds,[-1])) cls3_score = tf.gather(cls3_score, tf.reshape(cls3_inds, [-1])) self._act_summaries.append(conv3_resize) #------------------------------------------------------rcnn 2----------------------------------------------------# #generate target if is_training: with tf.control_dependencies([rpn_labels]): roi_scores = tf.gather(roi_scores, tf.reshape(cls3_inds, [-1])) rois, _, passinds4 = self._proposal_target_layer( rois, roi_scores, "rpn2_rois", batch2) cls3_score = tf.gather(cls3_score, tf.reshape(passinds4, [-1])) #fc_combine3_2 = tf.gather(fc_combine3_2, tf.reshape(passinds4,[-1])) if cfg.POOLING_MODE == 'crop': pool41 = self._crop_pool_layer(conv4_resize, rois, "pool41") else: raise NotImplementedError pool41_conv = slim.conv2d(pool41, 256, [1, 1], trainable=is_training, weights_initializer=initializer, scope="pool41_conv") pool41_avg = slim.avg_pool2d(pool41_conv, [7, 7], padding='SAME', scope='pool41_avg', stride=1) pool41_flat = slim.flatten(pool41_avg, scope='flatten41') fc4_2 = slim.fully_connected( pool41_flat, 512, scope='fc4_2', weights_initializer=tf.contrib.layers.xavier_initializer(), trainable=is_training) # if is_training: # fc4_2 = slim.dropout(fc4_2, keep_prob=0.5, is_training=True, scope='fc4_2') #fc4_2 = self._score_add_up(fc_combine3_2, fc4_2, factor1, factor2, 'fc_42_comb') #combine scale4_2 = tf.Variable(tf.cast(1, tf.float32), trainable=is_training, name='scale4_2') fc_combine4_2 = tf.scalar_mul(scale4_2, fc4_2) cls4_score = slim.fully_connected(fc_combine4_2, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls4_score') #cls4_score = self._score_add_up(cls3_score, cls4_score, factor1, factor2, 'cls4_score') #store RCNN2 self._predictions["cls2_score"] = cls4_score cls4_prob = self._softmax_layer(cls4_score, "cls4_prob") #reject cls4_inds = tf.reshape(tf.where(tf.less(cls4_prob[:, 0], reject2)), [-1]) rois = tf.gather(rois, tf.reshape(cls4_inds, [-1])) #fc_combine4_2 = tf.gather(fc_combine4_2, tf.reshape(cls4_inds,[-1])) cls4_score = tf.gather(cls4_score, tf.reshape(cls4_inds, [-1])) cls3_score = tf.gather(cls3_score, tf.reshape(cls4_inds, [-1])) self._act_summaries.append(conv4_resize) # #---------------------------------------------------------rcnn 1---------------------------------------------------------------# #generate target if is_training: with tf.control_dependencies([rpn_labels]): roi_scores = tf.gather(roi_scores, tf.reshape(cls4_inds, [-1])) rois, _, passinds5 = self._proposal_target_layer( rois, roi_scores, "rpn1_rois", batch1) cls4_score = tf.gather(cls4_score, tf.reshape(passinds5, [-1])) cls3_score = tf.gather(cls3_score, tf.reshape(passinds5, [-1])) #fc_combine4_2 = tf.gather(fc_combine4_2, tf.reshape(passinds5,[-1])) if cfg.POOLING_MODE == 'crop': pool51 = self._crop_pool_layer(self.endpoint['conv5_2'], rois, "pool51") else: raise NotImplementedError pool51_conv = slim.conv2d(pool51, 512, [1, 1], trainable=is_training, weights_initializer=initializer, scope="pool51_conv") pool51_avg = slim.avg_pool2d(pool51_conv, [7, 7], padding='SAME', scope='pool51_avg', stride=1) pool51_flat = slim.flatten(pool51_avg, scope='flatten51') fc5_2 = slim.fully_connected( pool51_flat, 512, scope='fc5_2', weights_initializer=tf.contrib.layers.xavier_initializer(), trainable=is_training) # if is_training: # fc5_2 = slim.dropout(fc5_2, keep_prob=0.5, is_training=True, scope='fc5_2') #fc5_2 = self._score_add_up(fc_combine4_2, fc5_2, factor1, factor2, 'fc_52_comb') #combine scale5_2 = tf.Variable(tf.cast(1, tf.float32), trainable=is_training, name='scale5_2') fc_combine5_2 = tf.scalar_mul(scale5_2, fc5_2) cls5_score = slim.fully_connected(fc_combine5_2, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls5_score') #cls5_score = self._score_add_up(cls4_score, cls5_score, factor1, factor2, 'cls5_score') #store RCNN2 self._predictions["cls1_score"] = cls5_score cls5_prob = self._softmax_layer(cls5_score, "cls5_prob") #reject cls5_inds = tf.reshape(tf.where(tf.less(cls5_prob[:, 0], reject1)), [-1]) rois = tf.gather(rois, tf.reshape(cls5_inds, [-1])) cls5_score = tf.gather(cls5_score, tf.reshape(cls5_inds, [-1])) cls4_score = tf.gather(cls4_score, tf.reshape(cls5_inds, [-1])) cls3_score = tf.gather(cls3_score, tf.reshape(cls5_inds, [-1])) self._act_summaries.append(self.endpoint['conv5_2']) #-------------------------------------------------------rcnn -------------------------------------------------------# #generate target if is_training: with tf.control_dependencies([rpn_labels]): roi_scores = tf.gather(roi_scores, tf.reshape(cls5_inds, [-1])) rois, _, passinds = self._proposal_target_layer( rois, roi_scores, "rpn_rois", batch) cls5_score = tf.gather(cls5_score, tf.reshape(passinds, [-1])) cls4_score = tf.gather(cls4_score, tf.reshape(passinds, [-1])) cls3_score = tf.gather(cls3_score, tf.reshape(passinds, [-1])) if cfg.POOLING_MODE == 'crop': pool5 = self._crop_pool_layer(self.endpoint['conv5_3'], rois, "pool5") self.endpoint['pool5'] = pool5 else: raise NotImplementedError pool5_flat = slim.flatten(pool5, scope='flatten') self._predictions['p5f'] = pool5_flat fc6 = slim.fully_connected(pool5_flat, 4096, scope='fc6') if is_training: fc6 = slim.dropout(fc6, keep_prob=0.5, is_training=True, scope='dropout6') fc7 = slim.fully_connected(fc6, 4096, scope='fc7') if is_training: fc7 = slim.dropout(fc7, keep_prob=0.5, is_training=True, scope='dropout7') cls0_score = slim.fully_connected(fc7, self._num_classes, weights_initializer=initializer, trainable=is_training, activation_fn=None, scope='cls_score_pre') self._predictions["cls0_score"] = cls0_score # cls3_score_scale = tf.Variable(tf.cast(1, tf.float32), trainable = is_training, name = 'cls3_score_scale') # cls2_score_scale = tf.Variable(tf.cast(1, tf.float32), trainable = is_training, name = 'cls2_score_scale') # cls1_score_scale = tf.Variable(tf.cast(1, tf.float32), trainable = is_training, name = 'cls1_score_scale') # cls0_score_scale = tf.Variable(tf.cast(1, tf.float32), trainable = is_training, name = 'cls0_score_scale') cls_score = cls3_score * 0.25 + cls4_score * 0.25 + cls5_score * 0.25 + cls0_score * 0.25 cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected( fc7, self._num_classes * 4, weights_initializer=initializer_bbox, trainable=is_training, activation_fn=None, scope='bbox_pred') cls_prob = self._softmax_layer(cls_score, "cls_prob") self._act_summaries.append(self.endpoint['conv5_3']) ###########################################################RCNN DONE############################################################ #store rpn values self._predictions["rpn_cls_score"] = rpn_cls_score self._predictions["rpn_cls_score_reshape"] = rpn_cls_score_reshape self._predictions["rpn_cls_prob"] = rpn_cls_prob self._predictions["rpn_bbox_pred"] = rpn_bbox_pred #store RCNN self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = rois #####only for training###### self._score_summaries.update(self._predictions) return rois, cls_prob, bbox_pred
def STbaseline(inputs, outputs, loss_weight, labels): """ Spatial stream based on VGG16 Temporal stream based on Flownet simple """ # Mean subtraction (BGR) for flying chairs mean = tf.constant([104.0, 117.0, 123.0], dtype=tf.float32, name="img_global_mean") # tf.tile(mean, [4,192,256,1]) inputs = inputs - mean outputs = outputs - mean # Scaling to 0 ~ 1 or -0.4 ~ 0.6? inputs = tf.truediv(inputs, 255.0) outputs = tf.truediv(outputs, 255.0) # Add local response normalization (ACROSS_CHANNELS) for computing photometric loss inputs_norm = tf.nn.local_response_normalization(inputs, depth_radius=4, beta=0.7) outputs_norm = tf.nn.local_response_normalization(outputs, depth_radius=4, beta=0.7) with slim.arg_scope([slim.conv2d, slim.conv2d_transpose], activation_fn=tf.nn.elu): # original use leaky ReLU, now we use elu # Contracting part Tconv1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [7, 7], stride=2, scope='Tconv1') Tconv2 = slim.conv2d(Tconv1, 128, [5, 5], stride=2, scope='Tconv2') Tconv3_1 = slim.conv2d(Tconv2, 256, [5, 5], stride=2, scope='Tconv3_1') Tconv3_2 = slim.conv2d(Tconv3_1, 256, [3, 3], scope='Tconv3_2') Tconv4_1 = slim.conv2d(Tconv3_2, 512, [3, 3], stride=2, scope='Tconv4_1') Tconv4_2 = slim.conv2d(Tconv4_1, 512, [3, 3], scope='Tconv4_2') Tconv5_1 = slim.conv2d(Tconv4_2, 512, [3, 3], stride=2, scope='Tconv5_1') Tconv5_2 = slim.conv2d(Tconv5_1, 512, [3, 3], scope='Tconv5_2') Tconv6_1 = slim.conv2d(Tconv5_2, 1024, [3, 3], stride=2, scope='Tconv6_1') Tconv6_2 = slim.conv2d(Tconv6_1, 1024, [3, 3], scope='Tconv6_2') # Hyper-params for computing unsupervised loss epsilon = 0.0001 alpha_c = 0.25 alpha_s = 0.37 lambda_smooth = 1.0 FlowDeltaWeights = tf.constant([0,0,0,0,1,-1,0,0,0,0,0,0,0,1,0,0,-1,0], dtype=tf.float32, shape=[3,3,2,2], name="FlowDeltaWeights") scale = 2 # for deconvolution # Expanding part pr6 = slim.conv2d(Tconv6_2, 2, [3, 3], activation_fn=None, scope='pr6') h6 = pr6.get_shape()[1].value w6 = pr6.get_shape()[2].value pr6_input = tf.image.resize_bilinear(inputs_norm, [h6, w6]) pr6_output = tf.image.resize_bilinear(outputs_norm, [h6, w6]) flow_scale_6 = 0.3125 # (*20/64) loss6, _ = loss_interp(pr6, pr6_input, pr6_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_6, FlowDeltaWeights) upconv5 = slim.conv2d_transpose(Tconv6_2, 512, [2*scale, 2*scale], stride=scale, scope='upconv5') pr6to5 = slim.conv2d_transpose(pr6, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr6to5') concat5 = tf.concat(3, [Tconv5_2, upconv5, pr6to5]) pr5 = slim.conv2d(concat5, 2, [3, 3], activation_fn=None, scope='pr5') h5 = pr5.get_shape()[1].value w5 = pr5.get_shape()[2].value pr5_input = tf.image.resize_bilinear(inputs_norm, [h5, w5]) pr5_output = tf.image.resize_bilinear(outputs_norm, [h5, w5]) flow_scale_5 = 0.625 # (*20/32) loss5, _ = loss_interp(pr5, pr5_input, pr5_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_5, FlowDeltaWeights) upconv4 = slim.conv2d_transpose(concat5, 256, [2*scale, 2*scale], stride=scale, scope='upconv4') pr5to4 = slim.conv2d_transpose(pr5, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr5to4') concat4 = tf.concat(3, [Tconv4_2, upconv4, pr5to4]) pr4 = slim.conv2d(concat4, 2, [3, 3], activation_fn=None, scope='pr4') h4 = pr4.get_shape()[1].value w4 = pr4.get_shape()[2].value pr4_input = tf.image.resize_bilinear(inputs_norm, [h4, w4]) pr4_output = tf.image.resize_bilinear(outputs_norm, [h4, w4]) flow_scale_4 = 1.25 # (*20/16) loss4, _ = loss_interp(pr4, pr4_input, pr4_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_4, FlowDeltaWeights) upconv3 = slim.conv2d_transpose(concat4, 128, [2*scale, 2*scale], stride=scale, scope='upconv3') pr4to3 = slim.conv2d_transpose(pr4, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr4to3') concat3 = tf.concat(3, [Tconv3_2, upconv3, pr4to3]) pr3 = slim.conv2d(concat3, 2, [3, 3], activation_fn=None, scope='pr3') h3 = pr3.get_shape()[1].value w3 = pr3.get_shape()[2].value pr3_input = tf.image.resize_bilinear(inputs_norm, [h3, w3]) pr3_output = tf.image.resize_bilinear(outputs_norm, [h3, w3]) flow_scale_3 = 2.5 # (*20/8) loss3, _ = loss_interp(pr3, pr3_input, pr3_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_3, FlowDeltaWeights) upconv2 = slim.conv2d_transpose(concat3, 64, [2*scale, 2*scale], stride=scale, scope='upconv2') pr3to2 = slim.conv2d_transpose(pr3, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr3to2') concat2 = tf.concat(3, [Tconv2, upconv2, pr3to2]) pr2 = slim.conv2d(concat2, 2, [3, 3], activation_fn=None, scope='pr2') h2 = pr2.get_shape()[1].value w2 = pr2.get_shape()[2].value pr2_input = tf.image.resize_bilinear(inputs_norm, [h2, w2]) pr2_output = tf.image.resize_bilinear(outputs_norm, [h2, w2]) flow_scale_2 = 5.0 # (*20/4) loss2, _ = loss_interp(pr2, pr2_input, pr2_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_2, FlowDeltaWeights) upconv1 = slim.conv2d_transpose(concat2, 32, [2*scale, 2*scale], stride=scale, scope='upconv1') pr2to1 = slim.conv2d_transpose(pr2, 2, [2*scale, 2*scale], stride=scale, activation_fn=None, scope='up_pr2to1') concat1 = tf.concat(3, [Tconv1, upconv1, pr2to1]) pr1 = slim.conv2d(concat1, 2, [3, 3], activation_fn=None, scope='pr1') h1 = pr1.get_shape()[1].value w1 = pr1.get_shape()[2].value pr1_input = tf.image.resize_bilinear(inputs_norm, [h1, w1]) pr1_output = tf.image.resize_bilinear(outputs_norm, [h1, w1]) flow_scale_1 = 10.0 # (*20/2) loss1, prev1 = loss_interp(pr1, pr1_input, pr1_output, epsilon, alpha_c, alpha_s, lambda_smooth, flow_scale_1, FlowDeltaWeights) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.relu, weights_initializer=tf.truncated_normal_initializer(0.0, 0.01), weights_regularizer=slim.l2_regularizer(0.0005)): # conv1_1 = slim.conv2d(tf.concat(3, [inputs, outputs]), 64, [3, 3], scope='conv1_1') conv1_1 = slim.conv2d(inputs, 64, [3, 3], scope='conv1_1') conv1_2 = slim.conv2d(conv1_1, 64, [3, 3], scope='conv1_2') pool1 = slim.max_pool2d(conv1_2, [2, 2], scope='pool1') conv2_1 = slim.conv2d(pool1, 128, [3, 3], scope='conv2_1') conv2_2 = slim.conv2d(conv2_1, 128, [3, 3], scope='conv2_2') pool2 = slim.max_pool2d(conv2_2, [2, 2], scope='pool2') conv3_1 = slim.conv2d(pool2, 256, [3, 3], scope='conv3_1') conv3_2 = slim.conv2d(conv3_1, 256, [3, 3], scope='conv3_2') conv3_3 = slim.conv2d(conv3_2, 256, [3, 3], scope='conv3_3') pool3 = slim.max_pool2d(conv3_3, [2, 2], scope='pool3') conv4_1 = slim.conv2d(pool3, 512, [3, 3], scope='conv4_1') conv4_2 = slim.conv2d(conv4_1, 512, [3, 3], scope='conv4_2') conv4_3 = slim.conv2d(conv4_2, 512, [3, 3], scope='conv4_3') pool4 = slim.max_pool2d(conv4_3, [2, 2], scope='pool4') conv5_1 = slim.conv2d(pool4, 512, [3, 3], scope='conv5_1') conv5_2 = slim.conv2d(conv5_1, 512, [3, 3], scope='conv5_2') conv5_3 = slim.conv2d(conv5_2, 512, [3, 3], scope='conv5_3') pool5 = slim.max_pool2d(conv5_3, [2, 2], scope='pool5') # Incorporate temporal feature concatST = tf.concat(3, [pool5, Tconv5_2]) poolST = slim.max_pool2d(concatST, [2, 2]) # print poolST.get_shape() concat2ST = tf.concat(3, [poolST, Tconv6_2]) # print concat2ST.get_shape() concatDR = slim.conv2d(concat2ST, 512, [1, 1]) # print concatDR.get_shape() flatten5 = slim.flatten(concatDR, scope='flatten5') fc6 = slim.fully_connected(flatten5, 4096, scope='fc6') dropout6 = slim.dropout(fc6, 0.9, scope='dropout6') fc7 = slim.fully_connected(dropout6, 4096, scope='fc7') dropout7 = slim.dropout(fc7, 0.9, scope='dropout7') fc8 = slim.fully_connected(dropout7, 101, activation_fn=None, scope='fc8') prob = tf.nn.softmax(fc8) actionPredictions = tf.argmax(prob, 1) cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(fc8, labels) actionLoss = tf.reduce_mean(cross_entropy) # Adding intermediate losses all_loss = loss_weight[0]*loss1["total"] + loss_weight[1]*loss2["total"] + loss_weight[2]*loss3["total"] + \ loss_weight[3]*loss4["total"] + loss_weight[4]*loss5["total"] + loss_weight[5]*loss6["total"] + \ loss_weight[0]*actionLoss slim.losses.add_loss(all_loss) losses = [loss1, loss2, loss3, loss4, loss5, loss6, actionLoss] # pr1 = tf.mul(tf.constant(20.0), pr1) flows_all = [pr1*flow_scale_1, pr2*flow_scale_2, pr3*flow_scale_3, pr4*flow_scale_4, pr5*flow_scale_5, pr6*flow_scale_6] predictions = [prev1, actionPredictions] return losses, flows_all, predictions
def inception_resnet_v2(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """Creates the Inception Resnet V1 model. Args: inputs: a 4-D tensor of size [batch_size, height, width, 3]. num_classes: number of predicted classes. is_training: whether is training or not. dropout_keep_prob: float, the fraction to keep before final layer. reuse: whether or not the network and its variables should be reused. To be able to reuse 'scope' must be given. scope: Optional variable_scope. Returns: logits: the logits outputs of the model. end_points: the set of end_points from the inception model. """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope( [slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # stem for input of Inception-Resnet-v2 with tf.variable_scope('stem'): net = stem(inputs) end_points['stem'] = net # 5 x Inception-resnet-A net = slim.repeat(net, 5, block35, scale=0.17) end_points['Mixed_5a'] = net # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 256, 256, 384, 384) end_points['Mixed_6a'] = net # channel = int(np.shape(net)[-1]) # net = Squeeze_excitation_layer(net, out_dim=channel, ratio=reduction_ratio, layer_name='SE_1') # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) end_points['Mixed_6b'] = net # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # channel = int(np.shape(net)[-1]) # net = Squeeze_excitation_layer(net, out_dim=channel, ratio=reduction_ratio, layer_name='SE_2') # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) end_points['Mixed_8a'] = net # net = block8(net, activation_fn=None) # end_points['Mixed_8b'] = net with tf.variable_scope('Logits'): end_points['PrePool'] = net #pylint: disable=no-member net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def mnist_model_dropout(inputs, is_training=True, emb_size=128, l2_weight=1e-3, batch_norm_decay=None, img_shape=None, new_shape=None, dropout_keep_prob=0.8, augmentation_function=None, image_summary=False): # pylint: disable=unused-argument """Construct the image-to-embedding vector model.""" inputs = tf.cast(inputs, tf.float32) # / 255.0 if new_shape is not None: shape = new_shape inputs = tf.image.resize_images(inputs, tf.constant(new_shape[:2]), method=tf.image.ResizeMethod.BILINEAR) else: shape = img_shape net = inputs if is_training and augmentation_function is not None: tf.map_fn(lambda frame: augmentation_function(frame), inputs) if augmentation_function is not None: tf.map_fn(lambda frame: tf.image.per_image_standardization(frame), inputs) with slim.arg_scope([slim.conv2d, slim.fully_connected], activation_fn=tf.nn.elu, weights_regularizer=slim.l2_regularizer(l2_weight)): with slim.arg_scope([slim.dropout], is_training=is_training): net = slim.conv2d(net, 32, [3, 3], scope='conv1_1') net = slim.conv2d(net, 32, [3, 3], scope='conv1_2') net = slim.max_pool2d(net, [2, 2], scope='pool1') # 14 net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout1') net = slim.conv2d(net, 64, [3, 3], scope='conv2_1') net = slim.conv2d(net, 64, [3, 3], scope='conv2_2') net = slim.max_pool2d(net, [2, 2], scope='pool2') # 7 net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout2') net = slim.conv2d(net, 128, [3, 3], scope='conv3_1') net = slim.conv2d(net, 128, [3, 3], scope='conv3_2') net = slim.max_pool2d(net, [2, 2], scope='pool3') # 3 net = slim.flatten(net, scope='flatten') net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='dropout3') emb = slim.fully_connected(net, emb_size, scope='fc1') return emb
def GoogLeNetv1(x, keep_prob): def inception_module(x, in_f, f_1, f_2_1, f_2_2, f_3_1, f_3_2, f_4_2): x1 = slim.conv2d(x, f_1, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x2_1 = slim.conv2d(x, f_2_1, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x2_2 = slim.conv2d(x2_1, f_2_1, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu) x3_1 = slim.conv2d(x, f_3_1, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x3_2 = slim.conv2d(x3_1, f_3_2, [5, 5], stride=1, padding='SAME', activation_fn=tf.nn.relu) x4_1 = slim.max_pool2d(x, [3, 3], stride=1, padding='SAME') x4_2 = slim.conv2d(x4_1, f_4_2, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x = tf.concat([x1, x2_2, x3_2, x4_2], axis=-1) return x x = slim.conv2d(x, 64, [7, 7], stride=2, padding="VALID", activation_fn=tf.nn.relu) x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME') x = tf.nn.local_response_normalization(x) x = slim.conv2d(x, 64, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) x = slim.conv2d(x, 192, [3, 3], stride=1, padding='SAME', activation_fn=tf.nn.relu) x = tf.nn.local_response_normalization(x) x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME') # inception 3a, 3b x = inception_module(x, 194, 64, 96, 128, 16, 32, 32) x = inception_module(x, 256, 128, 128, 192, 32, 96, 64) x = slim.max_pool2d(x, [3, 3], stride=2, padding='SAME') # inception 4a x = inception_module(x, 480, 192, 96, 208, 16, 48, 64) # auxiliary loss1 x_aux1 = slim.avg_pool2d(x, 5, padding='SAME', stride=1) x_aux1 = slim.conv2d(x_aux1, 128, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) mb, h, w, c = x_aux1.get_shape().as_list() x_aux1 = tf.reshape(x_aux1, [-1, h * w * c]) x_aux1 = slim.fully_connected(x_aux1, 1024, activation_fn=tf.nn.relu) x_aux1 = slim.dropout(x_aux1, keep_prob=keep_prob) x_aux1 = slim.fully_connected(x_aux1, num_classes) # inception 4b, 4c, 4d x = inception_module(x, 512, 160, 112, 224, 24, 64, 64) x = inception_module(x, 512, 128, 128, 256, 24, 64, 64) x = inception_module(x, 512, 112, 144, 288, 32, 64, 64) # auxiliary loss2 x_aux2 = slim.avg_pool2d(x, 5, padding='SAME', stride=1) x_aux2 = slim.conv2d(x_aux2, 128, [1, 1], stride=1, padding='SAME', activation_fn=tf.nn.relu) mb, h, w, c = x_aux2.get_shape().as_list() x_aux2 = tf.reshape(x_aux2, [-1, h * w * c]) x_aux2 = slim.fully_connected(x_aux2, 1024, activation_fn=tf.nn.relu) x_aux2 = slim.dropout(x_aux2, keep_prob=keep_prob) x_aux2 = slim.fully_connected(x_aux2, num_classes) # inception 4e, 5a, 5b x = inception_module(x, 528, 256, 160, 320, 32, 128, 128) x = slim.max_pool2d(x, 3, padding='SAME', stride=2) x = inception_module(x, 832, 256, 160, 320, 32, 128, 128) x = inception_module(x, 832, 384, 192, 384, 48, 128, 128) #x = slim.avg_pool2d(x, 7, stride=1, padding='SAME') #mb, h, w, c = x.get_shape().as_list() #x = tf.reshape(x, [-1, h * w * c]) x = tf.reduce_mean(x, axis=[1, 2]) x = slim.fully_connected(x, num_classes) return x, x_aux1, x_aux2
def classification(layer,keep_prob,num_class, scope=None): with tf.variable_scope(scope, 'classification', [layer]) as sc: drop = slim.dropout(layer,keep_prob=keep_prob) conv = slim.conv2d(drop,num_class,kernel_size=1) output = Upsampling(conv,8) return output
def build_graph(top_k): keep_prob = tf.placeholder(dtype=tf.float32, shape=[], name='keep_prob') images = tf.placeholder(dtype=tf.float32, shape=[None, 64, 64, 1], name='image_batch') labels = tf.placeholder(dtype=tf.int64, shape=[None], name='label_batch') is_training = tf.placeholder(dtype=tf.bool, shape=[], name='train_flag') with tf.device('/gpu:5'): with slim.arg_scope([slim.conv2d, slim.fully_connected], normalizer_fn=slim.batch_norm, normalizer_params={'is_training': is_training}): conv3_1 = slim.conv2d(images, 64, [3, 3], 1, padding='SAME', scope='conv3_1') max_pool_1 = slim.max_pool2d(conv3_1, [2, 2], [2, 2], padding='SAME', scope='pool1') conv3_2 = slim.conv2d(max_pool_1, 128, [3, 3], padding='SAME', scope='conv3_2') max_pool_2 = slim.max_pool2d(conv3_2, [2, 2], [2, 2], padding='SAME', scope='pool2') conv3_3 = slim.conv2d(max_pool_2, 256, [3, 3], padding='SAME', scope='conv3_3') max_pool_3 = slim.max_pool2d(conv3_3, [2, 2], [2, 2], padding='SAME', scope='pool3') conv3_4 = slim.conv2d(max_pool_3, 512, [3, 3], padding='SAME', scope='conv3_4') conv3_5 = slim.conv2d(conv3_4, 512, [3, 3], padding='SAME', scope='conv3_5') max_pool_4 = slim.max_pool2d(conv3_5, [2, 2], [2, 2], padding='SAME', scope='pool4') flatten = slim.flatten(max_pool_4) fc1 = slim.fully_connected(slim.dropout(flatten, keep_prob), 1024, activation_fn=tf.nn.relu, scope='fc1') logits = slim.fully_connected(slim.dropout(fc1, keep_prob), FLAGS.charset_size, activation_fn=None, scope='fc2') loss = tf.reduce_mean( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels)) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(logits, 1), labels), tf.float32)) update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) if update_ops: updates = tf.group(*update_ops) loss = control_flow_ops.with_dependencies([updates], loss) global_step = tf.get_variable("step", [], initializer=tf.constant_initializer(0.0), trainable=False) optimizer = tf.train.AdamOptimizer(learning_rate=0.1) train_op = slim.learning.create_train_op(loss, optimizer, global_step=global_step) probabilities = tf.nn.softmax(logits) tf.summary.scalar('loss', loss) tf.summary.scalar('accuracy', accuracy) merged_summary_op = tf.summary.merge_all() predicted_val_top_k, predicted_index_top_k = tf.nn.top_k(probabilities, k=top_k) accuracy_in_top_k = tf.reduce_mean( tf.cast(tf.nn.in_top_k(probabilities, labels, top_k), tf.float32)) return { 'images': images, 'labels': labels, 'keep_prob': keep_prob, 'top_k': top_k, 'global_step': global_step, 'train_op': train_op, 'loss': loss, 'is_training': is_training, 'accuracy': accuracy, 'accuracy_top_k': accuracy_in_top_k, 'merged_summary_op': merged_summary_op, 'predicted_distribution': probabilities, 'predicted_index_top_k': predicted_index_top_k, 'predicted_val_top_k': predicted_val_top_k }
def _create_network(incoming, num_classes, reuse=None, l2_normalize=True, create_summaries=True, weight_decay=1e-8): nonlinearity = tf.nn.elu conv_weight_init = tf.truncated_normal_initializer(stddev=1e-3) conv_bias_init = tf.zeros_initializer() conv_regularizer = slim.l2_regularizer(weight_decay) fc_weight_init = tf.truncated_normal_initializer(stddev=1e-3) fc_bias_init = tf.zeros_initializer() fc_regularizer = slim.l2_regularizer(weight_decay) def batch_norm_fn(x): return slim.batch_norm(x, scope=tf.get_variable_scope().name + "/bn") network = incoming network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_1", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) tf.summary.image("conv1_1/weights", tf.transpose( slim.get_variables("conv1_1/weights:0")[0], [3, 0, 1, 2]), max_images=128) network = slim.conv2d(network, 32, [3, 3], stride=1, activation_fn=nonlinearity, padding="SAME", normalizer_fn=batch_norm_fn, scope="conv1_2", weights_initializer=conv_weight_init, biases_initializer=conv_bias_init, weights_regularizer=conv_regularizer) if create_summaries: tf.summary.histogram(network.name + "/activations", network) # NOTE(nwojke): This is missing a padding="SAME" to match the CNN # architecture in Table 1 of the paper. Information on how this affects # performance on MOT 16 training sequences can be found in # issue 10 https://github.com/nwojke/deep_sort/issues/10 network = slim.max_pool2d(network, [3, 3], [2, 2], scope="pool1") network = residual_block(network, "conv2_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, is_first=True, summarize_activations=create_summaries) network = residual_block(network, "conv2_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_block(network, "conv3_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_block(network, "conv3_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) network = residual_block(network, "conv4_1", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=True, summarize_activations=create_summaries) network = residual_block(network, "conv4_3", nonlinearity, conv_weight_init, conv_bias_init, conv_regularizer, increase_dim=False, summarize_activations=create_summaries) feature_dim = network.get_shape().as_list()[-1] print("feature dimensionality: ", feature_dim) network = slim.flatten(network) network = slim.dropout(network, keep_prob=0.6) network = slim.fully_connected(network, feature_dim, activation_fn=nonlinearity, normalizer_fn=batch_norm_fn, weights_regularizer=fc_regularizer, scope="fc1", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) features = network if l2_normalize: # Features in rows, normalize axis 1. features = slim.batch_norm(features, scope="ball", reuse=reuse) feature_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(features), [1], keep_dims=True)) features = features / feature_norm with slim.variable_scope.variable_scope("ball", reuse=reuse): weights = slim.model_variable( "mean_vectors", (feature_dim, num_classes), initializer=tf.truncated_normal_initializer(stddev=1e-3), regularizer=None) scale = slim.model_variable("scale", (num_classes, ), tf.float32, tf.constant_initializer( 0., tf.float32), regularizer=None) if create_summaries: tf.summary.histogram("scale", scale) # scale = slim.model_variable( # "scale", (), tf.float32, # initializer=tf.constant_initializer(0., tf.float32), # regularizer=slim.l2_regularizer(1e-2)) # if create_summaries: # tf.scalar_summary("scale", scale) scale = tf.nn.softplus(scale) # Each mean vector in columns, normalize axis 0. weight_norm = tf.sqrt( tf.constant(1e-8, tf.float32) + tf.reduce_sum(tf.square(weights), [0], keep_dims=True)) logits = scale * tf.matmul(features, weights / weight_norm) else: logits = slim.fully_connected(features, num_classes, activation_fn=None, normalizer_fn=None, weights_regularizer=fc_regularizer, scope="softmax", weights_initializer=fc_weight_init, biases_initializer=fc_bias_init) return features, logits
def build_network(self, sess, is_training=True): # pyramid network scales changes at different levels of pyramid self._anchor_scales = {} # select initializers if cfg.TRAIN.TRUNCATED: initializer = tf.truncated_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.truncated_normal_initializer(mean=0.0, stddev=0.001) else: initializer = tf.random_normal_initializer(mean=0.0, stddev=0.01) initializer_bbox = tf.random_normal_initializer(mean=0.0, stddev=0.001) bottleneck = resnet_v1.bottleneck # choose different blocks for different number of layers if self._num_layers == 50: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 5 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 101: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 3 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 22 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] elif self._num_layers == 152: blocks = [ resnet_utils.Block('block1', bottleneck, [(256, 64, 1)] * 2 + [(256, 64, 2)]), resnet_utils.Block('block2', bottleneck, [(512, 128, 1)] * 7 + [(512, 128, 2)]), # Use stride-1 for the last conv4 layer resnet_utils.Block('block3', bottleneck, [(1024, 256, 1)] * 35 + [(1024, 256, 1)]), resnet_utils.Block('block4', bottleneck, [(2048, 512, 1)] * 3) ] else: # other numbers are not supported raise NotImplementedError assert (0 <= cfg.RESNET.FIXED_BLOCKS <= 4) if cfg.RESNET.FIXED_BLOCKS == 4: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net_conv4, endpoints = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) elif cfg.RESNET.FIXED_BLOCKS > 0: with slim.arg_scope(resnet_arg_scope(is_training=False)): net = self.build_base() net, endpoints = resnet_v1.resnet_v1(net, blocks[0:cfg.RESNET.FIXED_BLOCKS], global_pool=False, include_root_block=False, scope=self._resnet_scope) with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net_conv4, endpoints = resnet_v1.resnet_v1(net, blocks[cfg.RESNET.FIXED_BLOCKS:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) else: # cfg.RESNET.FIXED_BLOCKS == 0 with slim.arg_scope(resnet_arg_scope(is_training=is_training)): net = self.build_base() net_conv4, endpoints = resnet_v1.resnet_v1(net, blocks[0:-1], global_pool=False, include_root_block=False, scope=self._resnet_scope) pyramid = self.build_pyramid(endpoints) self._layers['head'] = net_conv4 # not sure what to do with this with tf.variable_scope(self._resnet_scope, self._resnet_scope): for i in range(5, 1, -1): p = i self._act_summaries.append(pyramid[p]) self._feat_stride[p] = [2 ** i] shape = tf.shape(pyramid[p]) h, w = shape[1], shape[2] # in the paper they use only one anchor per layer of pyramid. But when I tried that we were frequently receiving no overlaps in anchor_target_proposal(...) self._anchor_scales[p] = [2**(i-j) for j in range(self._num_scales-1, -1, -1)] self._anchor_component(p, h, w) # build the anchors for the image # rpn rpn = slim.conv2d(pyramid[p], 256, [3, 3], trainable=is_training, weights_initializer=initializer, scope="rpn_conv/3x3", stride=1) self._act_summaries.append(rpn) rpn_cls_score = slim.conv2d(rpn, self._num_anchors * 2, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_cls_score') # change it so that the score has 2 as its channel size rpn_cls_score_reshape = self._reshape_layer(rpn_cls_score, 2, 'rpn_cls_score_reshape') rpn_cls_prob_reshape = self._softmax_layer(rpn_cls_score_reshape, "rpn_cls_prob_reshape") rpn_cls_prob = self._reshape_layer(rpn_cls_prob_reshape, self._num_anchors * 2, "rpn_cls_prob") rpn_bbox_pred = slim.conv2d(rpn, self._num_anchors * 4, [1, 1], trainable=is_training, weights_initializer=initializer, padding='VALID', activation_fn=None, scope='rpn_bbox_pred') if is_training: rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p) rpn_labels = self._anchor_target_layer(rpn_cls_score, "anchor", p) # Try to have a determinestic order for the computing graph, for reproducibility with tf.control_dependencies([rpn_labels]): rois, roi_scores = self._proposal_target_layer(rois, roi_scores, "rpn_rois", p) else: if cfg.TEST.MODE == 'nms': rois, roi_scores = self._proposal_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p) elif cfg.TEST.MODE == 'top': rois, roi_scores = self._proposal_top_layer(rpn_cls_prob, rpn_bbox_pred, "rois", p) else: raise NotImplementedError self._predictions[p]['rois'] = rois self._predictions[p]['roi_scores'] = roi_scores self._predictions[p]['rpn_cls_score'] = rpn_cls_score self._predictions[p]['rpn_cls_score_reshape'] = rpn_cls_score_reshape self._predictions[p]['rpn_cls_prob'] = rpn_cls_prob self._predictions[p]['rpn_bbox_pred'] = rpn_bbox_pred all_roi_scores = tf.concat(values=[self._predictions[p]['roi_scores'] for p in pyramid], axis=0) all_rois = tf.concat(values=[self._predictions[p]['rois'] for p in pyramid], axis=0) p_vals = [tf.fill([tf.shape(self._predictions[p]['roi_scores'])[0], 1], float(p)) for p in pyramid] p_roi = tf.concat(values=[tf.reshape(p_vals, [-1, 1]), all_rois], axis=1) if is_training: all_proposal_target_labels = tf.concat(values=[self._proposal_targets[p]['labels'] for p in pyramid], axis=0) all_proposal_target_bbox = tf.concat(values=[self._proposal_targets[p]['bbox_targets'] for p in pyramid], axis=0) all_proposal_target_inside_w = tf.concat(values=[self._proposal_targets[p]['bbox_inside_weights'] for p in pyramid], axis=0) all_proposal_target_outside_w = tf.concat(values=[self._proposal_targets[p]['bbox_outside_weights'] for p in pyramid], axis=0) cfg_key = self._mode if type(cfg_key) == bytes: cfg_key = cfg_key.decode('utf-8') nms_top_n = all_roi_scores.shape[0] \ if all_roi_scores.shape[0] < cfg[cfg_key].RPN_POST_NMS_TOP_N \ else cfg[cfg_key].RPN_POST_NMS_TOP_N _, top_indices = tf.nn.top_k(tf.reshape(all_roi_scores, [-1]), k=nms_top_n) p_roi = tf.gather(p_roi, top_indices) [assigned_rois, _, _] = \ assign_boxes(all_rois, [all_rois, top_indices], [2, 3, 4, 5], 'assign_boxes') for p in range(5, 1, -1): splitted_rois = assigned_rois[p-2] # rcnn if cfg.POOLING_MODE == 'crop': cropped_roi = self._crop_pool_layer(pyramid[p], splitted_rois, "cropped_roi", p) self._predictions[p]['cropped_roi'] = cropped_roi else: raise NotImplementedError cropped_rois = [self._predictions[p_layer]['cropped_roi'] for p_layer in self._predictions] cropped_rois = tf.concat(values=cropped_rois, axis=0) cropped_regions = slim.max_pool2d(cropped_rois, [3, 3], stride=2, padding='SAME') refine = slim.flatten(cropped_regions) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) refine = slim.fully_connected(refine, 1024, activation_fn=tf.nn.relu) refine = slim.dropout(refine, keep_prob=0.75, is_training=is_training) cls_score = slim.fully_connected(refine, self._num_classes, activation_fn=None, weights_initializer=tf.truncated_normal_initializer(stddev=0.01)) cls_prob = self._softmax_layer(cls_score, "cls_prob") bbox_pred = slim.fully_connected(refine, self._num_classes*4, activation_fn=my_sigmoid, weights_initializer=tf.truncated_normal_initializer(stddev=0.001)) self._predictions["cls_score"] = cls_score self._predictions["cls_prob"] = cls_prob self._predictions["bbox_pred"] = bbox_pred self._predictions["rois"] = tf.gather(all_rois, top_indices) if is_training: self._proposal_targets['labels'] = all_proposal_target_labels self._proposal_targets['bbox_targets'] = all_proposal_target_bbox self._proposal_targets['bbox_inside_weights'] = all_proposal_target_inside_w self._proposal_targets['bbox_outside_weights'] = all_proposal_target_outside_w #self._score_summaries.update(self._predictions) # score summaries not compatible w/ dict return self._predictions["rois"], cls_prob, bbox_pred
def inception_resnet_v1(inputs, is_training=True, dropout_keep_prob=0.8, bottleneck_layer_size=128, reuse=None, scope='InceptionResnetV1'): """ 创建Inception Resnet V1模型 :param inputs: [batch_size,height,width,3]的四维张量 :param is_training: 是否训练 :param dropout_keep_prob: float,在最后一layer前保存的分数 :param bottleneck_layer_size: bottleneck layer的size :param reuse: 是否重用网络及其变量 :param scope: 可选的variable_scope :return: net: 模型的logits输出 :return end_points: 初始模型中的一组end_points """ end_points = {} with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training): with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding='SAME'): # 149 x 149 x 32 net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', scope='Conv2d_1a_3x3') end_points['Conv2d_1a_3x3'] = net # 147 x 147 x 32 net = slim.conv2d(net, 32, 3, padding='VALID', scope='Conv2d_2a_3x3') end_points['Conv2d_2a_3x3'] = net # 147 x 147 x 64 net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') end_points['Conv2d_2b_3x3'] = net # 73 x 73 x 64 net = slim.max_pool2d(net, 3, stride=2, padding='VALID', scope='MaxPool_3a_3x3') end_points['MaxPool_3a_3x3'] = net # 73 x 73 x 80 net = slim.conv2d(net, 80, 1, padding='VALID', scope='Conv2d_3b_1x1') end_points['Conv2d_3b_1x1'] = net # 71 x 71 x 192 net = slim.conv2d(net, 192, 3, padding='VALID', scope='Conv2d_4a_3x3') end_points['Conv2d_4a_3x3'] = net # 35 x 35 x 256 net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', scope='Conv2d_4b_3x3') end_points['Conv2d_4b_3x3'] = net # 5 x Inception-Resnet-A net = slim.repeat(net, 5, block35, scale=0.17) # Reduction-A with tf.variable_scope('Mixed_6a'): net = reduction_a(net, 192, 192, 256, 384) end_points['Mixed_6a'] = net # 10 x Inception-Resnet-B net = slim.repeat(net, 10, block17, scale=0.10) # Reduction-B with tf.variable_scope('Mixed_7a'): net = reduction_b(net) end_points['Mixed_7a'] = net # 5 x Inception-Resnet-C net = slim.repeat(net, 5, block8, scale=0.20) net = block8(net, activation_fn=None) with tf.variable_scope('Logits'): end_points['PrePool'] = net net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', scope='AvgPool_1a_8x8') net = slim.flatten(net) net = slim.dropout(net, dropout_keep_prob, is_training=is_training, scope='Dropout') end_points['PreLogitsFlatten'] = net net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, scope='Bottleneck', reuse=False) return net, end_points
def __init__(self, flags, is_training=True): self.is_training = is_training # None = batch_size self.image_ph = tf.placeholder(tf.float32, shape=(None, flags.feature_size)) self.text_ph = tf.placeholder(tf.int64, shape=(None, None)) self.hard_label_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) self.soft_logit_ph = tf.placeholder(tf.float32, shape=(None, flags.num_label)) # None = batch_size * sample_size self.sample_ph = tf.placeholder(tf.int32, shape=(None, 2)) self.reward_ph = tf.placeholder(tf.float32, shape=(None, )) self.tch_scope = tch_scope = 'tch' model_scope = nets_factory.arg_scopes_map[flags.image_model] vocab_size = utils.get_vocab_size(flags.dataset) with tf.variable_scope(tch_scope) as scope: with slim.arg_scope( model_scope(weight_decay=flags.image_weight_decay)): iembed = self.image_ph iembed = slim.dropout(iembed, flags.image_keep_prob, is_training=is_training) with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( flags.text_weight_decay)): wembed = slim.variable( 'wembed', shape=[vocab_size, flags.embedding_size], initializer=tf.random_uniform_initializer(-0.1, 0.1)) tembed = tf.nn.embedding_lookup(wembed, self.text_ph) tembed = tf.reduce_mean(tembed, axis=-2) with slim.arg_scope([slim.fully_connected], weights_regularizer=slim.l2_regularizer( flags.tch_weight_decay), biases_initializer=tf.zeros_initializer()): # cembed = tf.concat([tembed], 1) cembed = tf.concat([iembed, tembed], 1) self.logits = slim.fully_connected(cembed, flags.num_label, activation_fn=None) self.labels = tf.nn.softmax(self.logits) if not is_training: return save_dict, var_list = {}, [] for variable in tf.trainable_variables(): if not variable.name.startswith(tch_scope): continue print('%-50s added to TCH saver' % variable.name) save_dict[variable.name] = variable var_list.append(variable) self.saver = tf.train.Saver(save_dict) self.global_step = global_step = tf.Variable(0, trainable=False) tn_size = utils.get_tn_size(flags.dataset) learning_rate = flags.tch_learning_rate self.learning_rate = utils.get_lr(flags, tn_size, global_step, learning_rate, tch_scope) # pre train pre_losses = self.get_pre_losses() self.pre_loss = tf.add_n(pre_losses, name='%s_pre_loss' % tch_scope) pre_losses.extend(self.get_regularization_losses()) print('#pre_losses wt regularization=%d' % (len(pre_losses))) pre_optimizer = utils.get_opt(flags, self.learning_rate) self.pre_update = pre_optimizer.minimize(self.pre_loss, global_step=global_step) # kdgan train kdgan_losses = self.get_kdgan_losses(flags) self.kdgan_loss = tf.add_n(kdgan_losses, name='%s_kdgan_loss' % tch_scope) kdgan_optimizer = utils.get_opt(flags, self.learning_rate) # self.kdgan_update = kdgan_optimizer.minimize(self.kdgan_loss, global_step=global_step) gvs = kdgan_optimizer.compute_gradients(self.kdgan_loss, var_list) cgvs = [(tf.clip_by_norm(gv[0], config.max_norm), gv[1]) for gv in gvs] self.kdgan_update = kdgan_optimizer.apply_gradients( cgvs, global_step=global_step)
def __dropout(self,net): net_shape = net.get_shape().as_list() noise_shape = [net_shape[0],1,1,net_shape[-1]] return slim.dropout(net, noise_shape=noise_shape)
def base_pooling(self, x, b): reuse = self.get_reuse(self._roi_pool_call) self._roi_pool_call += 1 scope = self.scope L2_reg = self.args.box_filter_L2_reg dropout = self.args.dropout def _args_scope(): with slim.arg_scope( [slim.conv2d, slim.fully_connected], activation_fn=self.act_func, weights_regularizer=slim.l2_regularizer(L2_reg)): with slim.arg_scope([slim.conv2d], padding='SAME') as arg_sc: return arg_sc with slim.arg_scope(_args_scope()): with tf.variable_scope(scope, scope, [x, b], reuse=reuse) as sc: end_points_collection = sc.name + '_end_points' # Collect outputs for conv2d, fully_connected and max_pool2d. with slim.arg_scope( [slim.conv2d, slim.fully_connected, slim.max_pool2d], outputs_collections=end_points_collection): boxes_input = tf.identity(b[:, 1:], name='boxes') batch_idx = tf.cast(b[:, 0], dtype=tf.int32, name='batch_idx') pooled_features = tf.image.crop_and_resize( x, boxes_input, batch_idx, crop_size=self.output_shape) net = slim.conv2d(pooled_features, 1024, self.output_shape, stride=[1, 1], padding='VALID', scope='conv1_phoc') net = slim.conv2d(net, 1024, [1, 1], stride=[1, 1], padding='VALID', scope='conv2_phoc') # TODO: remove the flags if not self.args.tiny_phoc: net = slim.dropout(net, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc1') net = slim.conv2d(net, 1024, [1, 1], stride=[1, 1], padding='VALID', scope='conv3_phoc') if not self.args.tiny_phoc and not self.args.bigger_phoc: net = slim.dropout(net, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc2') net = slim.conv2d(net, 1024, [1, 1], stride=[1, 1], padding='VALID', scope='conv4_phoc') net = slim.dropout(net, keep_prob=1 - dropout, is_training=Kb.learning_phase(), scope='dropout_phoc3') net = slim.conv2d(net, 1024, [1, 1], stride=1, scope='phoc_feature') return net