def build_refine_bbox(self, feat_crops, training=False): x = conv2d(feat_crops, n_filters=256, k_size=1) x = fc_layer(x, 1024, training=training) x = fc_layer(x, 1024, training=training) x = tf.layers.dense(x, 6) clf_logits, regs = tf.split(x, [2, 4], axis=1) return clf_logits, regs
def decoder(x): """Create decoder given placeholder input tensor.""" # Decoding layer 1 with tf.name_scope('decoder1'): with tf.name_scope('weights'): weights1 = weight_variable([64, 512], stddev=0.1) variable_summaries(weights1) with tf.name_scope('biases'): biases1 = bias_variable([512], init_val=0.1) layer1 = fc_layer(x, weights1, biases1) # Decoding layer 2 with tf.name_scope('decoder2'): with tf.name_scope('weights'): weights2 = weight_variable([512, 2048], stddev=0.01) variable_summaries(weights1) with tf.name_scope('biases'): biases2 = bias_variable([2048], init_val=0.01) layer2 = fc_layer(layer1, weights2, biases2) # Decoding layer 3 with tf.name_scope('decoder3'): with tf.name_scope('weights'): weights3 = weight_variable( [2048, INPUT_WIDTH * INPUT_HEIGHT * NUM_CHANNELS], stddev=0.01) variable_summaries(weights2) with tf.name_scope('biases'): biases3 = bias_variable( [INPUT_WIDTH * INPUT_HEIGHT * NUM_CHANNELS], init_val=0.01) layer3 = fc_layer(layer2, weights3, biases3) return layer3
def forward(self, input_pose): with tf.variable_scope(self.name_scope): le10 = fc_layer(input_pose, self.latent_dim, name='encfc1') le11 = fc_layer(le10, self.latent_dim, name='encfc2') le12 = fc_layer(le11, self.latent_dim, name='encfc3') le13 = le10 + le12 le_out = fc_layer_linear(le13, self.output_dim, name='encfc4') return le_out
def forward(self, input_feature): with tf.variable_scope(self.name_scope): lp11 = fc_layer(input_feature, self.latent_dim, name='posefc1') lp12 = fc_layer(lp11, self.latent_dim, name='posefc2') lp13 = input_feature + lp12 lp21 = fc_layer(lp13, self.latent_dim, name='posefc3') lp22 = fc_layer(lp21, self.latent_dim, name='posefc4') lp23 = lp13 + lp22 lc_out = fc_layer_linear(lp23, self.output_dim, name='posefc5') return lc_out
def encoder(x): """Create encoder given placeholder input tensor.""" # Encoding layer 1 with tf.name_scope('encoder1'): with tf.name_scope('weights'): weights1 = weight_variable( [INPUT_WIDTH * INPUT_HEIGHT * NUM_CHANNELS, 2048], stddev=0.01) variable_summaries(weights1) with tf.name_scope('biases'): biases1 = bias_variable([2048], init_val=0.01) layer1 = fc_layer(x, weights1, biases1) # Encoding layer 2 with tf.name_scope('encoder2'): with tf.name_scope('weights'): weights2 = weight_variable([2048, 512], stddev=0.01) variable_summaries(weights1) with tf.name_scope('biases'): biases2 = bias_variable([512], init_val=0.01) layer2 = fc_layer(layer1, weights2, biases2) # Mu encoder layer with tf.name_scope('mu_encoder'): with tf.name_scope('weights'): weights_mu = weight_variable([512, 64], stddev=0.1) variable_summaries(weights_mu) with tf.name_scope('biases'): biases_mu = bias_variable([64], init_val=0.1) mu_encoder = fc_layer(layer2, weights_mu, biases_mu) # Log(sigma) encoder layer with tf.name_scope('log_sigma_encoder'): with tf.name_scope('weights'): weights_log_sigma = weight_variable([512, 64], stddev=0.1) variable_summaries(weights_log_sigma) with tf.name_scope('biases'): biases_log_sigma = bias_variable([64], init_val=0.1) log_sigma_encoder = fc_layer(layer2, weights_log_sigma, biases_log_sigma) # Sample epsilon, a truncated normal tensor epsilon = tf.truncated_normal(tf.shape(log_sigma_encoder)) # Sample latent variables with tf.name_scope('latent_layer'): std_encoder = tf.exp(log_sigma_encoder) z = tf.add(mu_encoder, tf.multiply(std_encoder, epsilon)) variable_summaries(z) return mu_encoder, log_sigma_encoder, epsilon, z
def BKStart(x, reuse): with tf.variable_scope('BKS', reuse=reuse): n = "BKStart_" x = conv_layer(x, 1, 32, 5, n + "conv_1", 1, pad='SAME') x = pool(x, 3, 2, name=n + "max_pool_1", pad='SAME', pool='max') x = conv_layer(x, 32, 32, 4, n + "conv_2", 1, pad='SAME') x = pool(x, 3, 2, n + "avg_pool_1", pool='avg') x = conv_layer(x, 32, 64, 5, n + "conv_3", 1, pad='SAME') x = pool(x, 3, 2, n + "avg_pool_2", pool='avg') flattened_shape = np.prod([s.value for s in x.get_shape()[1:]]) x = tf.reshape(x, [-1, flattened_shape], name=n + 'flatten') x = fc_layer(x, 2048, activation='Relu', name=n + 'FC_1') #x=dropout_layer(x,keep_prob) logits = fc_layer(x, 7, activation='None', name=n + 'FC_2') return logits
def baseline_histogram_density(x, parameters): h = layers.fc_layer(x, number_of_units = 100) y_prediction_density = layers.softmax_layer(h, number_of_outputs = 4) return y_prediction_density
def VGG16(x, n_classes, keep_prob): with tf.name_scope('VGG16'): # Group 1 x = layers.conv('conv1_1', x, 64, [3, 3], [1, 1, 1, 1]) x = layers.conv('conv1_2', x, 64, [3, 3], [1, 1, 1, 1]) with tf.name_scope('pool1'): x = layers.pool('pool1', x, [1, 2, 2, 1], [1, 2, 2, 1]) # Group 2 x = layers.conv('conv2_1', x, 128, [3, 3], [1, 1, 1, 1]) x = layers.conv('conv2_2', x, 128, [3, 3], [1, 1, 1, 1]) with tf.name_scope('pool2'): x = layers.pool('pool2', x, [1, 2, 2, 1], [1, 2, 2, 1]) # Group 3 x = layers.conv('conv3_1', x, 256, [3, 3], [1, 1, 1, 1]) x = layers.conv('conv3_2', x, 256, [3, 3], [1, 1, 1, 1]) x = layers.conv('conv3_3', x, 256, [3, 3], [1, 1, 1, 1]) with tf.name_scope('pool3'): x = layers.pool('pool3', x, [1, 2, 2, 1], [1, 2, 2, 1]) # Group 4 x = layers.conv('conv4_1', x, 512, [3, 3], [1, 1, 1, 1]) x = layers.conv('conv4_2', x, 512, [3, 3], [1, 1, 1, 1]) x = layers.conv('conv4_3', x, 512, [3, 3], [1, 1, 1, 1]) with tf.name_scope('pool4'): x = layers.pool('pool4', x, [1, 2, 2, 1], [1, 2, 2, 1]) # Group 5 x = layers.conv('conv5_1', x, 512, [3, 3], [1, 1, 1, 1]) x = layers.conv('conv5_2', x, 512, [3, 3], [1, 1, 1, 1]) x = layers.conv('conv5_3', x, 512, [3, 3], [1, 1, 1, 1]) with tf.name_scope('pool5'): x = layers.pool('pool5', x, [1, 2, 2, 1], [1, 2, 2, 1]) x = layers.fc_layer('fc6', x, 4096) x = layers.dropout('drop6', x, keep_prob) x = layers.fc_layer('fc7', x, 4096) x = layers.dropout('drop7', x, keep_prob) x = layers.fc_layer('fc8', x, n_classes) return x
def BKVGG8(x, keep_prob): n = "BKVGG8_" x = conv_layer(x, 1, 32, 3, n + "conv_1", 1, pad='SAME') x = pool(x, 2, 2, name=n + "max_pool_1", pad='SAME', pool='max') x = conv_layer(x, 32, 64, 3, n + "conv_2", 1, pad='SAME') x = pool(x, 2, 2, n + "max_pool_1", pool='max') x = conv_layer(x, 64, 128, 3, n + "conv_3", 1, pad='SAME') x = pool(x, 2, 2, n + "max_pool_2", pool='max') x = conv_layer(x, 128, 256, 3, n + "conv_4", 1, pad='SAME') x = conv_layer(x, 256, 256, 3, n + "conv_5", 1, pad='SAME') flattened_shape = np.prod([s.value for s in x.get_shape()[1:]]) x = tf.reshape(x, [-1, flattened_shape], name=n + 'flatten') x = fc_layer(x, 256, activation='Relu', name=n + 'FC_1') x = dropout_layer(x, keep_prob) x = fc_layer(x, 256, activation='Relu', name=n + 'FC_2') x = dropout_layer(x, keep_prob) logits = fc_layer(x, 7, activation='None', name=n + 'FC_3') return logits
def forward(self, pose, kcs, reuse=False): with tf.variable_scope(self.name_scope) as vs: if (reuse): vs.reuse_variables() ldp10 = fc_layer(pose, self.latent_dim_pose, name='discfc0') ldp11 = fc_layer(ldp10, self.latent_dim_pose, name='discfc1') ldp12 = fc_layer(ldp11, self.latent_dim_pose, name='discfc2') ldp13 = ldp10 + ldp12 ldp21 = fc_layer(ldp13, self.latent_dim_pose, name='discfc3') ldp22 = fc_layer(ldp21, self.latent_dim_pose, name='discfc4') ldp23 = ldp13 + ldp22 ldk10 = fc_layer(kcs, self.latent_dim_kcs, name='discfc5') ldk11 = fc_layer(ldk10, self.latent_dim_kcs, name='discfc6') ldk12 = fc_layer(ldk11, self.latent_dim_kcs, name='discfc7') ldk13 = ldk10 + ldk12 ldk21 = fc_layer(ldk13, self.latent_dim_kcs, name='discfc8') ldk22 = fc_layer(ldk21, self.latent_dim_kcs, name='discfc9') ldk23 = ldk13 + ldk22 ld = tf.concat([ldp23, ldk23], axis=-1) ld_out = fc_layer_linear(ld, self.output_dim, name='dicsfc10') return ld_out
def create(self,dropout_keep_prob,is_training=True): # 1 layer first 3 means filter_height, second 3 means filter_width. default 1 as stride. # conv1(x,filter_height,filter_width, num_filters, name, stride=1, padding='SAME') conv1 = conv_layer(self.X, 3, 3, 16, name = 'conv1',activation_function=self.activation_function,is_batch_normalization=self.is_batch_normalization) self.out = conv1 """ All residual blocks use zero-padding for shortcut connections """ # No matter how deep the network it is, just be divided into 4-5 Big Block. # Every Block can be divided into Block1_1(Block1_ResUnit1), Block1_2(Block1_ResUnit2), Block1_3(Block1_ResUnit3) again. # Then every Block1_1 is already residual unit. # Every resiudal unit has 2 conv layer. # one for loop has 6 conv layer. # residual_block should be changed into residual_unit. for i in range(self.NUM_CONV): # i=0,1,2. # It seems that every Block has 3 Residual Unit(block with lowercase). resBlock2 = residual_block(self.out, 16, name = 'resBlock2_{}'.format(i + 1), block_activation_function=self.activation_function,block_is_batch_normalization=self.is_batch_normalization) self.out = resBlock2 # 1 max_pool layer pool2 = max_pool(self.out, name = 'pool2') self.out = pool2 # It is different from original paper. In original paper, there has no pool operation in the middle layer. # Every ResUnit has 2 conv layer. # Every Block has 3 Residual Unit(block with lowercase). for i in range(self.NUM_CONV): resBlock3 = residual_block(self.out, 32, name = 'resBlock3_{}'.format(i + 1),block_activation_function=self.activation_function,block_is_batch_normalization=self.is_batch_normalization) self.out = resBlock3 # 1 max_pool layer pool3 = max_pool(self.out, name = 'pool3') self.out = pool3 # i=0,1,2 every block has 2 conv layer. # one for loop has 6 conv layer. # Every Block has 3 Residual Unit(block with lowercase). for i in range(self.NUM_CONV): resBlock4 = residual_block(self.out, 64, name = 'resBlock4_{}'.format(i + 1),block_activation_function=self.activation_function,block_is_batch_normalization=self.is_batch_normalization) self.out = resBlock4 # 1 global pool layer # Perform global average pooling to make spatial dimensions as 1x1 global_pool = global_average(self.out, name = 'gap') self.out = global_pool # flatten is not layer flatten = tf.contrib.layers.flatten(self.out) # 1 fully connected layer. # @Hazard # dropout_keep_prob: float, the fraction to keep before final layer. dpot_net = slim.dropout(flatten,dropout_keep_prob,is_training=is_training,scope='Dropout') fc5 = fc_layer(dpot_net, input_size = 64, output_size = self.NUM_CLASSES,relu = False, name = 'fc5') self.out = fc5
def baseline(x, parameters, nodropout_probability = None, Gaussian_noise_std = None): if Gaussian_noise_std is not None: x = layers.all_views_Gaussian_noise_layer(x, Gaussian_noise_std) # first conv sequence h = layers.all_views_conv_layer(x, 'conv1', number_of_filters = 32, filter_size = [3, 3], stride = [2, 2]) # second conv sequence h = layers.all_views_max_pool(h, stride = [3, 3]) h = layers.all_views_conv_layer(h, 'conv2a', number_of_filters = 64, filter_size = [3, 3], stride=[2, 2]) h = layers.all_views_conv_layer(h, 'conv2b', number_of_filters = 64, filter_size = [3, 3], stride=[1, 1]) h = layers.all_views_conv_layer(h, 'conv2c', number_of_filters = 64, filter_size = [3, 3], stride=[1, 1]) # third conv sequence next_sequence = True h = layers.all_views_max_pool(h, stride = [2, 2]) h = layers.all_views_conv_layer(h, 'conv3a', number_of_filters = 128, filter_size = [3, 3], stride = [1, 1]) h = layers.all_views_conv_layer(h, 'conv3b', number_of_filters = 128, filter_size = [3, 3], stride = [1, 1]) h = layers.all_views_conv_layer(h, 'conv3c', number_of_filters = 128, filter_size = [3, 3], stride = [1, 1]) # fourth conv sequence next_sequence = True h = layers.all_views_max_pool(h, stride = [2, 2]) h = layers.all_views_conv_layer(h, 'conv4a', number_of_filters = 128, filter_size = [3, 3], stride = [1, 1]) h = layers.all_views_conv_layer(h, 'conv4b', number_of_filters = 128, filter_size = [3, 3], stride = [1, 1]) h = layers.all_views_conv_layer(h, 'conv4c', number_of_filters = 128, filter_size = [3, 3], stride = [1, 1]) # fifth conv sequence next_sequence = True h = layers.all_views_max_pool(h, stride = [2, 2]) h = layers.all_views_conv_layer(h, 'conv5a', number_of_filters = 256, filter_size = [3, 3], stride = [1, 1]) h = layers.all_views_conv_layer(h, 'conv5b', number_of_filters = 256, filter_size = [3, 3], stride = [1, 1]) h = layers.all_views_conv_layer(h, 'conv5c', number_of_filters = 256, filter_size = [3, 3], stride = [1, 1]) h = layers.all_views_global_avg_pool(h) h = layers.all_views_flattening_layer(h) h = layers.fc_layer(h, number_of_units = 4 * 256) h = layers.dropout_layer(h, nodropout_probability) y_prediction_density = layers.softmax_layer(h, number_of_outputs = 4) return y_prediction_density
def create(self): conv1 = conv_layer(self.X, 3, 3, 16, name='conv1') self.out = conv1 """ All residual blocks use zer-padding for shortcut connections """ for i in range(self.NUM_CONV): resBlock2 = residual_block(self.out, 16, name='resBlock2_{}'.format(i + 1)) self.out = resBlock2 pool2 = max_pool(self.out, name='pool2') self.out = pool2 for i in range(self.NUM_CONV): resBlock3 = residual_block(self.out, 32, name='resBlock3_{}'.format(i + 1)) self.out = resBlock3 pool3 = max_pool(self.out, name='pool3') self.out = pool3 for i in range(self.NUM_CONV): resBlock4 = residual_block(self.out, 64, name='resBlock4_{}'.format(i + 1)) self.out = resBlock4 # Perform global average pooling to make spatial dimensions as 1x1 global_pool = global_average(self.out, name='gap') self.out = global_pool flatten = tf.contrib.layers.flatten(self.out) fc5 = fc_layer(flatten, input_size=64, output_size=self.NUM_CLASSES, relu=False, name='fc5') self.out = fc5
def baseline(x, parameters, nodropout_probability=None, gaussian_noise_std=None): if gaussian_noise_std is not None: x = layers.all_views_gaussian_noise_layer(x, gaussian_noise_std) # first conv sequence h = layers.all_views_conv_layer(x, 'conv1', number_of_filters=32, filter_size=[3, 3], stride=[2, 2]) # second conv sequence h = layers.all_views_max_pool(h, stride=[3, 3]) h = layers.all_views_conv_layer(h, 'conv2a', number_of_filters=64, filter_size=[3, 3], stride=[2, 2]) h = layers.all_views_conv_layer(h, 'conv2b', number_of_filters=64, filter_size=[3, 3], stride=[1, 1]) h = layers.all_views_conv_layer(h, 'conv2c', number_of_filters=64, filter_size=[3, 3], stride=[1, 1]) # third conv sequence h = layers.all_views_max_pool(h, stride=[2, 2]) h = layers.all_views_conv_layer(h, 'conv3a', number_of_filters=128, filter_size=[3, 3], stride=[1, 1]) h = layers.all_views_conv_layer(h, 'conv3b', number_of_filters=128, filter_size=[3, 3], stride=[1, 1]) h = layers.all_views_conv_layer(h, 'conv3c', number_of_filters=128, filter_size=[3, 3], stride=[1, 1]) # fourth conv sequence h = layers.all_views_max_pool(h, stride=[2, 2]) h = layers.all_views_conv_layer(h, 'conv4a', number_of_filters=128, filter_size=[3, 3], stride=[1, 1]) h = layers.all_views_conv_layer(h, 'conv4b', number_of_filters=128, filter_size=[3, 3], stride=[1, 1]) h = layers.all_views_conv_layer(h, 'conv4c', number_of_filters=128, filter_size=[3, 3], stride=[1, 1]) # fifth conv sequence h = layers.all_views_max_pool(h, stride=[2, 2]) h = layers.all_views_conv_layer(h, 'conv5a', number_of_filters=256, filter_size=[3, 3], stride=[1, 1]) h = layers.all_views_conv_layer(h, 'conv5b', number_of_filters=256, filter_size=[3, 3], stride=[1, 1]) h = layers.all_views_conv_layer(h, 'conv5c', number_of_filters=256, filter_size=[3, 3], stride=[1, 1]) # Pool, flatten, and fully connected layers h = layers.all_views_global_avg_pool(h) h = layers.all_views_flattening_layer(h) #flatening and concatenation h = layers.fc_layer(h, number_of_units=1024) #h = layers.dropout_layer(h, nodropout_probability) y_prediction_birads = layers.softmax_layer(h, number_of_outputs=3) print(y_prediction_birads) return y_prediction_birads
def build_model(placeholders,info,batch_size=4,adj_channel_num=1,embedding_dim=10): in_adjs=placeholders["adjs"] features=placeholders["features"] in_nodes=placeholders["nodes"] labels=placeholders["labels"] mask=placeholders["mask"] dropout_rate=placeholders["dropout_rate"] wd_b=None wd_w=0.1 layer=features input_dim=info.feature_dim if features is None: layer=emmbeding_layer("embeding",in_nodes,info.all_node_num,embedding_dim,init_params_flag=True,params=None) input_dim=embedding_dim # layer: batch_size x graph_node_num x dim with tf.variable_scope("gcn_1") as scope: output_dim=64 layer = layers.gcn_layer("graph_conv",layer,in_adjs,input_dim,output_dim, adj_channel_num=adj_channel_num,node_num=info.graph_node_num,batch_size=batch_size) layer = tf.nn.relu(layer) input_dim=output_dim with tf.variable_scope("pooling_1") as scope: layer = layers.graph_max_pooling_layer(layer,in_adjs, input_dim, adj_channel_num=adj_channel_num,node_num=info.graph_node_num,batch_size=batch_size) with tf.variable_scope("bn_1") as scope: layer=layers.graph_batch_normalization("bn",layer,input_dim,info.graph_node_num,init_params_flag=True,params=None) with tf.variable_scope("do_1") as scope: layer=layers.graph_dropout_layer(layer,info.graph_node_num,input_dim,dropout_rate) with tf.variable_scope("gcn_2") as scope: output_dim=128 layer = layers.gcn_layer("graph_conv",layer,in_adjs,input_dim,output_dim,adj_channel_num=adj_channel_num,node_num=info.graph_node_num,batch_size=batch_size) layer = tf.sigmoid(layer) input_dim=output_dim with tf.variable_scope("pooling_2") as scope: layer = layers.graph_max_pooling_layer(layer,in_adjs, input_dim, adj_channel_num=adj_channel_num,node_num=info.graph_node_num,batch_size=batch_size) with tf.variable_scope("bn_2") as scope: layer=layers.graph_batch_normalization("bn",layer,input_dim,info.graph_node_num,init_params_flag=True,params=None) with tf.variable_scope("do_2") as scope: layer=layers.graph_dropout_layer(layer,info.graph_node_num,input_dim,dropout_rate) with tf.variable_scope("gcn_3") as scope: output_dim=128 layer = layers.gcn_layer("graph_conv",layer,in_adjs,input_dim,output_dim,adj_channel_num=adj_channel_num,node_num=info.graph_node_num,batch_size=batch_size) layer = tf.sigmoid(layer) input_dim=output_dim with tf.variable_scope("pooling_3") as scope: layer = layers.graph_max_pooling_layer(layer,in_adjs, input_dim, adj_channel_num=adj_channel_num,node_num=info.graph_node_num,batch_size=batch_size) with tf.variable_scope("bn_3") as scope: layer=layers.graph_batch_normalization("bn",layer,input_dim,info.graph_node_num,init_params_flag=True,params=None) with tf.variable_scope("do_3") as scope: layer=layers.graph_dropout_layer(layer,info.graph_node_num,input_dim,dropout_rate) with tf.variable_scope("fc4") as scope: output_dim=64 layer = layers.graph_fc_layer("fc",layer,input_dim, output_dim,info.graph_node_num, init_params_flag=True,params=None,wd_w=wd_w,wd_b=wd_b,activate=tf.sigmoid,with_bn=False) input_dim=output_dim with tf.variable_scope("gathering") as scope: layer = layers.graph_gathering_layer(layer) with tf.variable_scope("fc5") as scope: output_dim=2 model = layers.fc_layer("fc3",layer,input_dim, output_dim, init_params_flag=True,params=None,wd_w=wd_w,wd_b=wd_b,activate=None,with_bn=False) prediction=tf.nn.softmax(model) # computing cost and metrics cost=mask*tf.nn.softmax_cross_entropy_with_logits(labels=labels,logits=model) cost_opt=tf.reduce_mean(cost) metrics={} cost_sum=tf.reduce_sum(cost) correct_count=mask*tf.cast(tf.equal(tf.argmax(prediction,1), tf.argmax(labels,1)),tf.float32) metrics["correct_count"]=tf.reduce_sum(correct_count) return model,prediction,cost_opt,cost_sum,metrics
def train(): if FLAGS.random_run: batch_step, test_batch_step, train_batch, val_batch, test_batch = random_dataset( ) else: batch_step, test_batch_step, train_batch, val_batch, test_batch = prepare_dataset( ) global_step = tf.get_variable("global_step", [], initializer=tf.constant_initializer(0), trainable=False) input_placeholder = tf.placeholder(tf.float32, shape=(FLAGS.batch_size, FLAGS.depth, FLAGS.height, FLAGS.width, FLAGS.in_channels), name="input_placeholder") labels_placeholder = tf.placeholder(tf.int64, shape=[FLAGS.batch_size], name="label_placeholder") lr_decay = tf.train.exponential_decay(FLAGS.learning_rate, global_step=global_step, decay_steps=FLAGS.decay_steps, decay_rate=FLAGS.decay_rate, staircase=True) optimizer = None if FLAGS.optimizer == "GD": optimizer = tf.train.GradientDescentOptimizer(lr_decay) elif FLAGS.optimizer == "ADAM": optimizer = tf.train.AdamOptimizer(learning_rate=FLAGS.learning_rate) elif FLAGS.optimizer == "MOMENTUM": optimizer = tf.train.MomentumOptimizer(learning_rate=lr_decay, momentum=0.9, use_nesterov=FLAGS.use_nesterov) net = layers.strict_norm_net(input_placeholder, feature_maps=FLAGS.feature_maps, weight_decay=FLAGS.weight_decay) logits = layers.fc_layer(net, weight_size=FLAGS.num_classes, act_fn=None, name="FC_OUT", weight_decay=FLAGS.weight_decay) with tf.name_scope("Loss"): loss = compute_loss("Dataset_Name", logits, labels_placeholder) accuracy = compute_accuracy(logits, labels_placeholder) train_op = optimizer.minimize(loss, global_step=global_step) saver = tf.train.Saver(max_to_keep=5) with tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) as sess: sess.run(tf.global_variables_initializer()) current_exec = time.time() train_dir = FLAGS.save_path model_save_dir = os.path.join(train_dir, str(current_exec)) if not os.path.exists(model_save_dir): os.makedirs(model_save_dir) with open(os.path.join(model_save_dir, "params_settings"), "w+") as f: f.write(params_str) # Create summary writer merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter( os.path.join(model_save_dir, 'train'), sess.graph) val_writer = tf.summary.FileWriter(os.path.join(model_save_dir, 'val'), sess.graph) total_steps = trange(FLAGS.max_steps) test_milestones = map(int, FLAGS.test_milestones.split(",")) t_acc, v_acc, t_loss, v_loss = .0, .0, .0, .0 for step in total_steps: train_images, train_labels = next(train_batch) _, t_loss = sess.run([train_op, loss], feed_dict={ input_placeholder: train_images, labels_placeholder: train_labels }) t_loss = float(np.mean(t_loss)) total_steps.set_description( 'Loss: {:.4f}/{:.4f} - t_acc: {:.3f} - v_acc: {:.3f}'.format( t_loss, v_loss, t_acc, v_acc)) # Evaluate the model periodically. if step % FLAGS.evaluate_every == 0 or (step + 1) == FLAGS.max_steps: summary, t_acc = sess.run( [merged, accuracy], feed_dict={ input_placeholder: train_images, labels_placeholder: train_labels }) train_writer.add_summary(summary, step) val_images, val_labels = next(val_batch) summary, v_loss, v_acc = sess.run([merged, loss, accuracy], feed_dict={ input_placeholder: val_images, labels_placeholder: val_labels }) val_writer.add_summary(summary, step) v_loss = float(np.mean(v_loss)) total_steps.set_description( 'Loss: {:.4f}/{:.4f} - t_acc: {:.3f} - v_acc: {:.3f}'. format(t_loss, v_loss, t_acc, v_acc)) # Test on the whole validation or test set if (step / batch_step) in test_milestones or ( step + 1) == FLAGS.max_steps: if FLAGS.save_checkpoint: saver.save(sess, os.path.join(model_save_dir, FLAGS.checkpoint_name), global_step=step) print("testing...") test_acc_list, test_loss_list = [], [] for _ in trange(test_batch_step): test_images, test_labels = next(test_batch) test_acc, test_loss = sess.run( [accuracy, loss], feed_dict={ input_placeholder: test_images, labels_placeholder: test_labels }) test_acc_list.append(test_acc) test_loss_list.append(test_loss) print("Epoch {} - Acc: {} - Loss {}".format( (step / batch_step), np.mean(test_acc_list), np.mean(test_loss_list))) with open(os.path.join(model_save_dir, "test_results"), "a") as f: f.write("Epoch: {}\n".format(step / batch_step)) f.write("\tMean train accuracy: {}\n".format(t_acc)) f.write("\tMean train loss: {}\n\n".format(t_loss)) f.write("\tMean test accuracy: {}\n".format( np.mean(test_acc_list))) f.write("\tMean test loss: {}\n\n".format( np.mean(test_loss_list)))
def myNet(input_x, input_y, is_training, img_len=28, channel_num=1, output_size=10, conv_featmap=[6, 16], fc_units=[84], conv_kernel_size=[5, 5], pooling_size=[2, 2], l2_norm=0.01, seed=235, stride_size=[2, 2], drop_rate=0): """ We modify and use the code in the homework to build our neural network. Here we use the architecture in the paper as the startpoint for our CNN practice. Its architecture is as follow. input >> Conv2DLayer >> Conv2DLayer >> Conv2DLayer >> Conv2DLayer >> Conv2DLayer >> Conv2DLayer >> Conv2DLayer >> Conv2DLayer >>flatten >> DenseLayer >> DenseLayer >> AffineLayer >> softmax loss >> output http://deeplearning.net/tutorial/lenet.html :param input_x: The input of LeNet. It should be a 4D array like (batch_num, img_len, img_len, channel_num). :param input_y: The label of the input images. It should be a 1D vector like (batch_num, ) :param is_training: A flag (boolean variable) that indicates the phase of the model. 'True' means the training phase, 'False' means the validation phase. In this case, this param would not exactly affect the model's performance, it is only used as a indicator. But if you use 'norm_layer' in your custom network, it would change the performance of the model. :param img_len: The image size of the input data. For example, img_len=32 means the input images have the size: 32*32. :param channel_num: The channel number of the images. For RGB images, channel_num=3. :param output_size: The size of the output. It should be equal to the number of classes. For this problem, output_size=10. :param conv_featmap: An array that stores the number of feature maps for every conv layer. The length of the array should be equal to the number of conv layers you used. :param fc_units: An array that stores the number of units for every fc hidden layers. The length of the array should be equal to the number of hidden layers you used. (This means that the last output fc layer should be excluded.) :param conv_kernel_size: An array that stores the shape of the kernel for every conv layer. For example, kernal_shape = 3 means you have a 3*3 kernel. The length of the array should be equal to the number of conv layers you used. :param pooling_size: An array that stores the kernel size you want to behave pooling action for every max pooling layer. The length of the array should be equal to the number of pooling layers you used. :param l2_norm: the penalty coefficient for l2 norm loss. :param seed: An integer that presents the random seed used to generate the initial parameter value. """ # assert len(conv_featmap) == len(conv_kernel_size) and len(conv_featmap) == len(pooling_size) # conv layer 0 i = 0 conv_layer_0 = conv_layer(input_x=input_x, in_channel=channel_num, out_channel=conv_featmap[i], kernel_shape=conv_kernel_size[i], rand_seed=seed, index=i) norm_layer0 = tf.layers.batch_normalization(conv_layer_0.output()) activation_layer0 = tf.nn.relu(norm_layer0) pooling_layer_0 = max_pooling_layer(input_x=activation_layer0, k_size=pooling_size[i], stride=stride_size[i], padding="VALID") dropout_layer0 = tf.layers.dropout(pooling_layer_0.output(), rate=drop_rate) # conv layer 1 i += 1 conv_layer_1 = conv_layer(input_x=dropout_layer0, in_channel=conv_featmap[i - 1], out_channel=conv_featmap[i], kernel_shape=conv_kernel_size[i], rand_seed=seed, index=i) norm_layer1 = tf.layers.batch_normalization(conv_layer_1.output()) activation_layer1 = tf.nn.relu(norm_layer1) pooling_layer_1 = max_pooling_layer(input_x=activation_layer1, k_size=pooling_size[i], stride=stride_size[i], padding="VALID") dropout_layer1 = tf.layers.dropout(pooling_layer_1.output(), rate=drop_rate) # conv layer 2 i += 1 conv_layer_2 = conv_layer(input_x=dropout_layer1, in_channel=conv_featmap[i - 1], out_channel=conv_featmap[i], kernel_shape=conv_kernel_size[i], rand_seed=seed, index=i) norm_layer2 = tf.layers.batch_normalization(conv_layer_2.output()) activation_layer2 = tf.nn.relu(norm_layer2) pooling_layer_2 = max_pooling_layer(input_x=activation_layer2, k_size=pooling_size[i], stride=stride_size[i], padding="VALID") dropout_layer2 = tf.layers.dropout(pooling_layer_2.output(), rate=drop_rate) # conv layer 3 i += 1 conv_layer_3 = conv_layer(input_x=dropout_layer2, in_channel=conv_featmap[i - 1], out_channel=conv_featmap[i], kernel_shape=conv_kernel_size[i], rand_seed=seed, index=i) norm_layer3 = tf.layers.batch_normalization(conv_layer_3.output()) activation_layer3 = tf.nn.relu(norm_layer3) pooling_layer_3 = max_pooling_layer(input_x=activation_layer3, k_size=pooling_size[i], stride=stride_size[i], padding="VALID") dropout_layer3 = tf.layers.dropout(pooling_layer_3.output(), rate=drop_rate) # conv layer 4 i += 1 conv_layer_4 = conv_layer(input_x=dropout_layer3, in_channel=conv_featmap[i - 1], out_channel=conv_featmap[i], kernel_shape=conv_kernel_size[i], rand_seed=seed, index=i) norm_layer4 = tf.layers.batch_normalization(conv_layer_4.output()) activation_layer4 = tf.nn.relu(norm_layer4) pooling_layer_4 = max_pooling_layer(input_x=activation_layer4, k_size=pooling_size[i], stride=stride_size[i], padding="VALID") dropout_layer4 = tf.layers.dropout(pooling_layer_4.output(), rate=drop_rate) # conv layer 5 i += 1 conv_layer_5 = conv_layer(input_x=dropout_layer4, in_channel=conv_featmap[i - 1], out_channel=conv_featmap[i], kernel_shape=conv_kernel_size[i], rand_seed=seed, index=i) norm_layer5 = tf.layers.batch_normalization(conv_layer_5.output()) activation_layer5 = tf.nn.relu(norm_layer5) pooling_layer_5 = max_pooling_layer(input_x=activation_layer5, k_size=pooling_size[i], stride=stride_size[i], padding="VALID") dropout_layer5 = tf.layers.dropout(pooling_layer_5.output(), rate=drop_rate) # conv layer 6 i += 1 conv_layer_6 = conv_layer(input_x=dropout_layer5, in_channel=conv_featmap[i - 1], out_channel=conv_featmap[i], kernel_shape=conv_kernel_size[i], rand_seed=seed, index=i) norm_layer6 = tf.layers.batch_normalization(conv_layer_6.output()) activation_layer6 = tf.nn.relu(norm_layer6) pooling_layer_6 = max_pooling_layer(input_x=activation_layer6, k_size=pooling_size[i], stride=stride_size[i], padding="VALID") dropout_layer6 = tf.layers.dropout(pooling_layer_6.output(), rate=drop_rate) # conv layer 7 i += 1 conv_layer_7 = conv_layer(input_x=dropout_layer6, in_channel=conv_featmap[i - 1], out_channel=conv_featmap[i], kernel_shape=conv_kernel_size[i], rand_seed=seed, index=i) norm_layer7 = tf.layers.batch_normalization(conv_layer_7.output()) activation_layer7 = tf.nn.relu(norm_layer7) pooling_layer_7 = max_pooling_layer(input_x=activation_layer7, k_size=pooling_size[i], stride=stride_size[i], padding="VALID") dropout_layer7 = tf.layers.dropout(pooling_layer_7.output(), rate=drop_rate) # flatten out_shape = dropout_layer7.get_shape() img_vector_length = out_shape[1].value * out_shape[2].value * out_shape[ 3].value flatten = tf.reshape(dropout_layer7, shape=[-1, img_vector_length]) # fc layer fc_layer_0 = fc_layer(input_x=flatten, in_size=img_vector_length, out_size=fc_units[0], rand_seed=seed, activation_function=tf.nn.relu, index=0) fc_layer_1 = fc_layer(input_x=fc_layer_0.output(), in_size=fc_units[0], out_size=fc_units[1], rand_seed=seed, activation_function=tf.nn.relu, index=1) # output out_length = tf.layers.dense(fc_layer_1.output(), units=7) digit_0 = tf.layers.dense(fc_layer_1.output(), units=11) digit_1 = tf.layers.dense(fc_layer_1.output(), units=11) digit_2 = tf.layers.dense(fc_layer_1.output(), units=11) digit_3 = tf.layers.dense(fc_layer_1.output(), units=11) digit_4 = tf.layers.dense(fc_layer_1.output(), units=11) # saving the parameters for l2_norm loss # conv_w = [conv_layer_0.weight, conv_layer_1.weight, conv_layer_2.weight,, conv_layer_3.weight # conv_layer_4.weight, conv_layer_5.weight, conv_layer_6.weight, conv_layer_7.weight] # fc_w = [fc_layer_0.weight, fc_layer_1.weight] conv_w = [ conv_layer_0.weight, conv_layer_1.weight, conv_layer_2.weight, conv_layer_3.weight, conv_layer_4.weight, conv_layer_5.weight, conv_layer_6.weight, conv_layer_7.weight ] fc_w = [fc_layer_0.weight, fc_layer_1.weight] return out_length, tf.stack([digit_0, digit_1, digit_2, digit_3, digit_4], axis=1), conv_w, fc_w
def create(self, dropout_keep_prob, is_training=True): conv1 = conv_layer(self.X, 3, 3, 16, name='conv1', activation_function=self.activation_function, is_batch_normalization=self.is_batch_normalization) self.out = conv1 """ All residual blocks use zer-padding for shortcut connections """ for i in range(self.NUM_CONV): resBlock2 = residual_block( self.out, 16, name='resBlock2_{}'.format(i + 1), block_activation_function=self.activation_function, block_is_batch_normalization=self.is_batch_normalization) self.out = resBlock2 pool2 = max_pool(self.out, name='pool2') self.out = pool2 for i in range(self.NUM_CONV): resBlock3 = residual_block( self.out, 32, name='resBlock3_{}'.format(i + 1), block_activation_function=self.activation_function, block_is_batch_normalization=self.is_batch_normalization) self.out = resBlock3 pool3 = max_pool(self.out, name='pool3') self.out = pool3 for i in range(self.NUM_CONV): resBlock4 = residual_block( self.out, 64, name='resBlock4_{}'.format(i + 1), block_activation_function=self.activation_function, block_is_batch_normalization=self.is_batch_normalization) self.out = resBlock4 # Perform global average pooling to make spatial dimensions as 1x1 global_pool = global_average(self.out, name='gap') self.out = global_pool flatten = tf.contrib.layers.flatten(self.out) # @Hazard # dropout_keep_prob: float, the fraction to keep before final layer. dpot_net = slim.dropout(flatten, dropout_keep_prob, is_training=is_training, scope='Dropout') fc5 = fc_layer(dpot_net, input_size=64, output_size=self.NUM_CLASSES, relu=False, name='fc5') self.out = fc5
64, '13', padding='SAME', biases_value=0.0, **init.dict) BN_layer5 = tf.layers.batch_normalization(layer13, center=True, scale=True, training=IS_TRAIN) pool5 = layers.max_pool(layer13, 2, 2, padding='SAME') print('[pool_5]:', pool5.get_shape().as_list()) flatten = tf.layers.flatten(pool5) print('[flatten]:', flatten.get_shape().as_list()) fc1 = layers.fc_layer(flatten, 64, '14') BN_layer6 = tf.layers.batch_normalization(fc1, center=True, scale=True, training=IS_TRAIN) print('[fc_1]:', fc1.get_shape().as_list()) with tf.variable_scope('CAM'): logits = layers.fc_layer(BN_layer6, num_classes, '15') print('[logits]:', logits.get_shape().as_list()) with tf.variable_scope('CAM', reuse=True): pool5_resize = tf.image.resize_images(pool5, [224, 224]) pool5_value = tf.nn.bias_add(tf.get_variable("weights_15"), tf.get_variable("baises_15"))
def build_model(self): with tf.variable_scope('Input'): generator_in = tf.placeholder(self.dtype, shape=[self.batchsize, self.inputsize], name='generator_in') generator_in_noise = tf.placeholder(self.dtype, shape=[self.batchsize, self.latent_dim], name='generator_in_noise') discriminator_in = tf.placeholder(self.dtype, shape=[self.batchsize, self.outputsize], name='discriminator_in') self.generator_in = generator_in self.discriminator_in = discriminator_in self.generator_in_noise = generator_in_noise self.isTraining = tf.placeholder(tf.bool, name='isTrainingflag') self.lr_d = tf.placeholder(self.dtype, name='learning_rate_d') self.lr_g = tf.placeholder(self.dtype, name='learning_rate_g') with tf.variable_scope('Generator') : h1 = fc_layer(generator_in, self.latent_dim, name='genfc1') h2 = resisual_block(h1, self.latent_dim, name_scope='Block1') pose_out = self.posenet.forward(h2, generator_in_noise) cam_out = self.camnet.forward(h2) enc_out = self.encoder.forward(pose_out) psi_out = kcs_layer(pose_out) psi_vec_out = tcl.flatten(psi_out) psi_real = kcs_layer(discriminator_in) psi_vec_real = tcl.flatten(psi_real) average_samples = ops.weightedsample(discriminator_in, pose_out) psi_average_samples = kcs_layer(average_samples) psi_average_samples_vec = tcl.flatten(psi_average_samples) pose2d_repro = reprojection_layer(cam_out, pose_out) with tf.variable_scope('Discriminator') : d_fake = self.critic.forward(pose_out, psi_vec_out) d_real = self.critic.forward(discriminator_in, psi_vec_real, reuse = True) d_average = self.critic.forward(average_samples, psi_average_samples_vec, reuse = True) self.d_real = d_real self.d_fake = d_fake with tf.variable_scope('loss') : self.loss_reprojection = ops.weighted_pose_2d_loss(generator_in, pose2d_repro) self.loss_cam = ops.cam_loss(cam_out) self.loss_reg = ops.regularizer(pose_out, generator_in_noise) kernel = getattr(mmd, '_%s_kernel' % self.kernel) K_XX, K_XY, K_YY, wts = kernel(d_fake, d_real, add_dot = self.dot_weight) self.loss_g = mmd.mmd2([K_XX, K_XY, K_YY, False]) self.loss_d = - self.loss_g self.loss_gp = mmd.gp_loss(d_average, average_samples, d_real, d_fake, kernel, self.dot_weight) self.loss_enc = tf.reduce_mean(tf.abs(enc_out - self.generator_in_noise)) total_loss_g = self.repro_weight * self.loss_reprojection + self.cam_weight * self.loss_cam + \ self.loss_g + self.reg_weight * self.loss_reg + self.enc_weight * self.loss_enc total_loss_d = self.gp_weight * self.loss_gp + self.loss_d self.output_pose = pose_out self.camout = cam_out t_vars = tf.trainable_variables() self.d_vars = [var for var in t_vars if 'Discriminator' in var.name] self.g_var = [var for var in t_vars if 'Generator' in var.name] self.d_optimim = tf.train.AdamOptimizer(self.lr_d, beta1=self.beta1, beta2=0.9) self.g_optimim = tf.train.AdamOptimizer(self.lr_g, beta1=self.beta1, beta2=0.9) dgradients = self.d_optimim.compute_gradients(total_loss_d, self.d_vars) ggradients = self.g_optimim.compute_gradients(total_loss_g, self.g_var) # dgradients = [(tf.clip_by_norm(dd, 1.0), vv) for dd, vv in dgradients] # ggradients = [(tf.clip_by_norm(gg, 1.0), vv) for gg, vv in ggradients] self.dupdates = self.d_optimim.apply_gradients(dgradients, global_step=self.global_step) self.gupdates = self.g_optimim.apply_gradients(ggradients, global_step=self.global_step) self.loss_repro_summ = tf.summary.scalar('loss/loss_repro', self.loss_reprojection, collections=['train', 'test']) self.loss_cam_summ = tf.summary.scalar('loss/loss_cam', self.loss_cam, collections=['train', 'test']) self.loss_gp_summ = tf.summary.scalar('loss/loss_gp', self.loss_gp, collections=['train']) self.loss_d_summ = tf.summary.scalar('loss/loss_d', self.loss_d, collections=['train']) self.loss_g_summ = tf.summary.scalar('loss/loss_g', self.loss_g, collections=['train']) self.learning_rate_summary_d = tf.summary.scalar('learning_rate_d', self.lr_d) self.learning_rate_summary_g = tf.summary.scalar('learning_rate_g', self.lr_g) self.saver = tf.train.Saver(tf.global_variables(), max_to_keep=10)