def _get_mean(self, _input): """ """ dirs = self.directives num_layers = dirs['num_layers'] num_nodes = dirs['num_nodes'] activation = dirs['activation'] net_grow_rate = dirs['net_grow_rate'] output_dim = self._oslot_to_shape[0][-1] with tf.variable_scope(self.name + '_mean', reuse=tf.AUTO_REUSE): # Define the Means hid_layer = fully_connected( _input, num_nodes, activation_fn=activation, biases_initializer=tf.random_normal_initializer( stddev=1 / np.sqrt(num_nodes))) for _ in range(num_layers - 1): num_nodes = int(num_nodes * net_grow_rate) hid_layer = fully_connected( hid_layer, num_nodes, activation_fn=activation, biases_initializer=tf.random_normal_initializer( stddev=1 / np.sqrt(num_nodes))) mean = fully_connected(hid_layer, output_dim, activation_fn=None) return mean, hid_layer
def embedding_model(im_feats, sent_feats, train_phase, im_labels, fc_dim=2048, embed_dim=512): """ Build two-branch embedding networks. fc_dim: the output dimension of the first fc layer. embed_dim: the output dimension of the second fc layer, i.e. embedding space dimension. """ # Image branch. im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1') im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None, scope='im_embed_2') i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10) # Text branch. sent_fc1 = add_fc(sent_feats, fc_dim, train_phase, 'sent_embed_1') sent_fc2 = fully_connected(sent_fc1, embed_dim, activation_fn=None, scope='sent_embed_2') s_embed = tf.nn.l2_normalize(sent_fc2, 1, epsilon=1e-10) return i_embed, s_embed
def get_network(img_emb, sent_emb): im_fc1 = add_fc(img_emb, 2048, True, 'im_embed_1') im_fc2 = fully_connected(im_fc1, 512, activation_fn=None, scope = 'im_embed_2') i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10) sent_fc1 = add_fc(sent_emb, 2048, True,'sent_embed_1') sent_fc2 = fully_connected(sent_fc1, 512, activation_fn=None, scope = 'sent_embed_2') s_embed = tf.nn.l2_normalize(sent_fc2, 1, epsilon=1e-10) return i_embed, s_embed
def _build_out_default(self, Z): """ """ ydim = self.ydim # with tf.variable_scope('DetAE', reuse=tf.AUTO_REUSE): full_out_1 = fully_connected(Z, 64) full_out_2 = fully_connected(full_out_1, 128) Yprime = fully_connected(full_out_2, ydim) return Yprime
def _generate_out(self, Z): """ """ ydim = self.ydim full_out_1 = fully_connected(Z, 64) full_out_2 = fully_connected(full_out_1, 128) Yprime = fully_connected(full_out_2, ydim) return Yprime
def embedding_model(vd_feats, sent_feats, train_phase, vd_labels, fc_dim=2048, embed_dim=512): """ Build two-branch embedding networks. fc_dim: the output dimension of the first fc layer. embed_dim: the output dimension of the second fc layer, i.e. embedding space dimension. """ # video branch. dim_hidden = 1000 dim_video = 4096 n_video_lstm_step = 80 batch_size = vd_feats.shape[0] lstm1 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, state_is_tuple=False) lstm2 = tf.contrib.rnn.BasicLSTMCell(dim_hidden, state_is_tuple=False) encode_video_W = tf.Variable(tf.random_uniform([dim_video, dim_hidden], -0.1, 0.1), name='encode_video_W') encode_video_b = tf.Variable(tf.zeros([dim_hidden]), name='encode_video_b') video_flat = tf.reshape(vd_feats, [-1, dim_video]) video_emb = tf.nn.xw_plus_b(video_flat, encode_video_W, encode_video_b) video_emb = tf.reshape(video_emb, [batch_size, n_video_lstm_step, dim_hidden]) state1 = tf.zeros([batch_size, lstm1.state_size]) state2 = tf.zeros([batch_size, lstm2.state_size]) padding = tf.zeros([batch_size, dim_hidden]) with tf.variable_scope(tf.get_variable_scope()) as scope: for i in range(0, n_video_lstm_step): if i > 0: tf.get_variable_scope().reuse_variables() with tf.variable_scope("LSTM1"): output1, state1 = lstm1(video_emb[:, i, :], state1) with tf.variable_scope("LSTM2"): output2, state2 = lstm2(tf.concat([padding, output1], 1), state2) im_fc1 = add_fc(tf.concat([state1, state2], 1), fc_dim, train_phase, 'vd_embed_1') im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None, scope='vd_embed_2') v_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10) # Text branch. sent_fc1 = add_fc(sent_feats, fc_dim, train_phase, 'sent_embed_1') sent_fc2 = fully_connected(sent_fc1, embed_dim, activation_fn=None, scope='sent_embed_2') s_embed = tf.nn.l2_normalize(sent_fc2, 1, epsilon=1e-10) return v_embed, s_embed
def _linear(self, input_tensor, output_nums, l2_reg, activation_fn=None): if l2_reg <= 0: return layers.fully_connected(input_tensor, output_nums, activation_fn=activation_fn, weights_initializer=layers.xavier_initializer(), biases_initializer=layers.xavier_initializer(),) else: return layers.fully_connected(input_tensor, output_nums, activation_fn=activation_fn, weights_initializer=layers.xavier_initializer(), biases_initializer=layers.xavier_initializer(), weights_regularizer=layers.l2_regularizer(l2_reg), biases_regularizer=layers.l2_regularizer(l2_reg))
def encode_phrases(args, phrase_plh, train_phase_plh, num_phrases_plh, phrase_feature_dim, phrase_denom_plh, vecs): final_embed = args.dim_embed embed_dim = final_embed * 4 phrase_plh = tf.reshape(phrase_plh, [-1, num_phrases_plh, phrase_feature_dim]) # sometimes finetuning word embedding helps (with l2 reg), but often doesn't # seem to make a big difference word_embeddings = tf.get_variable('word_embeddings', vecs.shape, initializer=tf.constant_initializer(vecs), trainable = args.embedding_ft) embedded_words = tf.nn.embedding_lookup(word_embeddings, phrase_plh) # if you do finetune embed_l2reg = tf.zeros(1) if args.embedding_ft: embed_l2reg = tf.nn.l2_loss(word_embeddings - vecs) eps = 1e-10 if args.language_model == 'gru': phrase_plh = tf.reshape(phrase_plh, [-1, phrase_feature_dim]) source_sequence_length = tf.reduce_sum(tf.cast(phrase_plh > 0, tf.int32), 1) embedded_words = tf.reshape(embedded_words, [-1, phrase_feature_dim, vecs.shape[1]]) encoder_cell = tf.nn.rnn_cell.GRUCell(final_embed) encoder_outputs, encoder_state = tf.nn.dynamic_rnn( encoder_cell, embedded_words, dtype=encoder_cell.dtype, sequence_length=source_sequence_length) final_outputs = extract_axis_1(encoder_outputs, source_sequence_length-1) phrase_input = tf.reshape(final_outputs, [-1, num_phrases_plh, final_embed]) outputs = fully_connected(phrase_input, embed_dim, activation_fn = None, weights_regularizer = tf.contrib.layers.l2_regularizer(0.005), scope = 'phrase_encoder') phrase_embed = tf.nn.l2_normalize(outputs, 2, epsilon=eps) else: num_words = tf.reduce_sum(tf.to_float(phrase_plh > 0), 2, keep_dims=True) + eps phrase_input = tf.nn.l2_normalize(tf.reduce_sum(embedded_words, 2) / num_words, 2) if args.language_model == 'attend': context_vector = tf.tile(tf.expand_dims(phrase_input, 2), (1, 1, phrase_feature_dim, 1)) attention_inputs = tf.concat((context_vector, embedded_words), 3) attention_weights = fully_connected(attention_inputs, 1, weights_regularizer = l2_regularizer(0.0005), scope = 'self_attend') attention_weights = tf.nn.softmax(tf.squeeze(attention_weights)) phrase_input = tf.nn.l2_normalize(tf.reduce_sum(embedded_words * tf.expand_dims(attention_weights, 3), 2), 2) phrase_input = tf.reshape(phrase_input, [-1, num_phrases_plh, vecs.shape[1]]) if args.cca_parameters: parameters = pickle.load(open(args.cca_parameters, 'rb')) phrase_embed = setup_initialize_fc_layers(args, phrase_input, parameters, 'lang', train_phase_plh, norm_axis=2) else: phrase_embed = embedding_branch(phrase_input, embed_dim, train_phase_plh, 'phrase', norm_axis=2) concept_weights = embedding_branch(phrase_input, embed_dim, train_phase_plh, 'concept_weight', do_l2norm = False, outdim = args.num_embeddings) concept_loss = tf.reduce_sum(tf.norm(concept_weights, axis=2, ord=1)) / phrase_denom_plh concept_weights = tf.nn.softmax(concept_weights) return phrase_embed, concept_weights, concept_loss, embed_l2reg
def _build_default(self): """ """ Y = self.Y zdim = self.zdim with tf.variable_scope('DetAE', reuse=tf.AUTO_REUSE): full_in_1 = fully_connected(Y, 128) full_in_2 = fully_connected(full_in_1, 64) Z = fully_connected(full_in_2, zdim) Yprime = self._build_out_default(Z) return Yprime, Z
def _define_default(self): """ """ Y = self.Y zdim = self.zdim full_in_1 = fully_connected(Y, 128) full_in_2 = fully_connected(full_in_1, 64) Z = fully_connected(full_in_2, zdim) Yprime = self._generate_out(Z) return Yprime, Z
def define_graph(input_image, batch_size): print("string define the graph...") # 卷积层1 conv2d_layer_1 = convolution2d( input_image, num_outputs = 32, # filter 个数 kernel_size = (5,5), # filter 的宽和高 activation_fn = tf.nn.relu, weights_initializer = tf.random_normal_initializer, stride = [2, 2], trainable = True) pool_layer_1 = tf.nn.max_pool(conv2d_layer_1, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') # 卷积层2 conv2d_layer_2 = convolution2d( pool_layer_1, num_outputs = 64, kernel_size = (5, 5), activation_fn = tf.nn.relu, weights_initializer = tf.random_normal_initializer, stride = (1, 1), trainable = True) pool_layer_2 = tf.nn.max_pool(conv2d_layer_2, ksize = [1, 2, 2, 1], strides = [1, 2, 2, 1], padding = 'SAME') flattened_layer_2 = tf.reshape(pool_layer_2, [ batch_size, -1 ]) # weight_init 参数也可以接收一个可调用参数,这里使用的了一个lambda 表达式返回了一个截断的正态分布,并指定了标准差 hidden_layer_3 = fully_connected( flattened_layer_2, 512, weights_initializer = lambda i, dtype, partition_info: tf.truncated_normal([38912, 512], stddev = 0.1), activation_fn = tf.nn.relu ) # 对一些神经元进行dropout,削减他们在模型中的重要性 hidden_layer_3 = tf.nn.dropout(hidden_layer_3, 0.1) # 输出是前面的层与训练中可用的120个不同狗品种的全连接 final_fully_connected = fully_connected( hidden_layer_3, 120, # 120 种狗 weights_initializer = lambda i, dtype, partition_info: tf.truncated_normal([512, 120], stddev = 0.1) ) print("graph is ready") return final_fully_connected
def generator(z, y): ''' args: z: random vector returns: net: generator network ''' zf = layers.fully_connected(z, 1000, scope='fcz') yf = layers.fully_connected(y, 200, scope='fcy') net = tf.concat(1, [zf, yf]) net = layers.fully_connected(net, 1200, activation_fn=tf.nn.relu, scope='fc1') net = layers.fully_connected(net, 1200, activation_fn=tf.nn.relu, scope='fc2') net = layers.fully_connected(net, WIDTH*WIDTH, scope='fc3') return net
def pose_net(tgt_image, src_image_stack, is_training=True): inputs = tf.concat([tgt_image, src_image_stack], axis=3) B, H, W, C = src_image_stack.get_shape().as_list() num_source = int(C // 3) with tf.variable_scope('pose_net') as sc: end_points_collection = sc.original_name_scope + '_end_points' with slim.arg_scope( [slim.conv2d, slim.conv2d_transpose, fully_connected], normalizer_fn=None, weights_regularizer=slim.l2_regularizer(0.05), activation_fn=tf.nn.relu, outputs_collections=end_points_collection): # cnv1 to cnv5b are shared between pose and explainability prediction cnv1 = slim.conv2d(inputs, 16, [7, 7], stride=2, scope='cnv1') cnv2 = slim.conv2d(cnv1, 32, [5, 5], stride=2, scope='cnv2') cnv3 = slim.conv2d(cnv2, 64, [3, 3], stride=2, scope='cnv3') cnv4 = slim.conv2d(cnv3, 128, [3, 3], stride=2, scope='cnv4') cnv5 = slim.conv2d(cnv4, 256, [3, 3], stride=2, scope='cnv5') # Pose specific layers with tf.variable_scope('pose'): cnv6 = slim.conv2d(cnv5, 256, [3, 3], stride=2, scope='cnv6') cnv7 = slim.conv2d(cnv6, 512, [3, 3], stride=2, scope='cnv7') flat_1 = tf.reshape(cnv7, (B, -1)) dense_1 = fully_connected(flat_1, 512, activation_fn=tf.nn.relu) pose_pred_xyz = fully_connected(dense_1, 3 * num_source, activation_fn=None) pose_pred_quat = fully_connected(dense_1, 3 * num_source, activation_fn=None) pose_avg = tf.concat((pose_pred_xyz, pose_pred_quat), 1) pose_avg = 0.1 * pose_avg pose_final = tf.reshape(pose_avg, [-1, num_source, 6]) end_points = utils.convert_collection_to_dict( end_points_collection) return pose_final, end_points
def setup_lstm(self, encoder_cell, embedded_word_ids, tokens, source_sequence_length, reuse, suffix): universal_embedding, avg_words = universal_embedding_layer( embedded_word_ids, tokens, self.embed_dim, suffix) if self.args.separate_lang_branch: reuse = None suffix = '_' + suffix else: suffix = '' encoder_outputs, encoder_state = tf.nn.dynamic_rnn( encoder_cell, universal_embedding, dtype=tf.float32, sequence_length=source_sequence_length, scope='rnn' + suffix) final_outputs = extract_axis_1(encoder_outputs, source_sequence_length - 1) outputs = fully_connected( final_outputs, self.embed_dim, activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(0.005), scope='phrase_encoder' + suffix, reuse=reuse) sent_embed = tf.nn.l2_normalize(outputs, 1, epsilon=1e-10) return sent_embed, avg_words
def __init__(self, lr, st_size, act_size): # Current State Placeholder self.current_state_in = tf.placeholder(shape=[1], dtype=tf.int32) # Create a One hot vector current_state_OneHot = tf.one_hot(self.current_state_in, st_size) # Output of the Fully Connected Layer output = fully_connected(current_state_OneHot, act_size, activation_fn=tf.nn.sigmoid, biases_initializer=None, weights_initializer=tf.ones_initializer()) self.output = tf.reshape(output, [-1]) self.chosen_action = tf.argmax(self.output, 0) # Placeholder to store rewards self.reward_holder = tf.placeholder(shape=[1], dtype=tf.float32) self.action_holder = tf.placeholder(shape=[1], dtype=tf.int32) self.responsible_weight = tf.slice(self.output, self.action_holder, [1]) # Policy Gradient Calculation self.loss = -(tf.log(self.responsible_weight) * self.reward_holder) # Define and use Gradient Descent Optimizer optimizer = tf.train.GradientDescentOptimizer(learning_rate=learning_rate) # Update the Neural Network Agen values self.update = optimizer.minimize(self.loss) # -------------------- EOC -----------------------
def add_fc(inputs, outdim, train_phase, scope_in): fc = fully_connected(inputs, outdim, activation_fn=None, scope=scope_in + '/fc') fc_bnorm = tf.layers.batch_normalization(fc, momentum=0.1, epsilon=1e-5, training=train_phase, name=scope_in + '/bnorm') fc_relu = tf.nn.relu(fc_bnorm, name=scope_in + '/relu') fc_out = tf.layers.dropout(fc_relu, rate= 0.1, seed=0, training=train_phase, name=scope_in + '/dropout') return fc_out
def setup_initialize_fc_layers(args, feats, parameters, scope_in, train_phase, norm_axis=2): for i, params in enumerate(parameters): scaling = params['scaling'] outdim = len(scaling) cca_mean, cca_proj = params[scope_in + '_mean'], params[scope_in + '_proj'] weights_init = tf.constant_initializer(cca_proj, dtype=tf.float32) weight_reg = weight_l2_regularizer(params[scope_in + '_proj'], args.cca_weight_reg) if (i + 1) < len(parameters): activation_fn = tf.nn.relu else: activation_fn = None feats = fully_connected( feats - cca_mean, outdim, activation_fn=activation_fn, weights_initializer=weights_init, weights_regularizer=weight_reg, #trainable=False, scope=scope_in + '_embed_' + str(i)) * scaling feats = tf.nn.l2_normalize(feats, norm_axis, epsilon=1e-10) return feats
def embedding_branch(x, embed_dim, train_phase_plh, scope_in, do_l2norm=True, outdim=None, norm_axis=1): """Applies a pair of fully connected layers to the input tensor. Arguments: x -- input_tensor embed_dim -- dimension of the input to the second fully connected layer train_phase_plh -- indicator whether model is in training mode scope_in -- scope prefix for the desired layers do_l2norm -- indicates if the output should be l2 normalized outdim -- dimension of the output embedding, if None outdim=embed_dim """ embed_fc1 = add_fc(x, embed_dim, train_phase_plh, scope_in + '_embed_1') if outdim is None: outdim = embed_dim l2_reg = tf.contrib.layers.l2_regularizer(0.001) embed_fc2 = fully_connected(embed_fc1, outdim, activation_fn=None, weights_regularizer=l2_reg, scope=scope_in + '_embed_2') if do_l2norm: embed_fc2 = tf.nn.l2_normalize(embed_fc2, norm_axis) return embed_fc2
def embedding_model(im_feats, sent_feats, train_phase, im_labels, fc_dim=1024, embed_dim=1024): """ Build two-branch embedding networks. fc_dim: the output dimension of the first fc layer. embed_dim: the output dimension of the second fc layer, i.e. embedding space dimension. """ # Image branch. # layer_1 = tf.add(tf.matmul(im_feats, wf['h1']), bf['b1']) # layer_2 = tf.add(tf.matmul(layer_1, wf['h2']), bf['b2']) # layer_3 = tf.add(tf.matmul(layer_2, wf['h3']), bf['b3']) im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1') im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None, scope='im_embed_2') fc_bnorm = tf.layers.batch_normalization(im_fc2, momentum=0.1, epsilon=1e-5, training=train_phase) # im_fc2 = tf.layers.dense(im_fc1, embed_dim, activation=tf.nn.tanh) i_embed = tf.nn.l2_normalize(fc_bnorm, 1, epsilon=1e-10) # Voice branch. # layer_1 = tf.add(tf.matmul(sent_feats, wv['h1']), bv['b1']) # layer_2 = tf.add(tf.matmul(layer_1, wv['h2']), bv['b2']) # layer_3 = tf.add(tf.matmul(layer_2, wv['h3']), bv['b3']) sent_fc1 = add_fc(sent_feats, fc_dim, train_phase, 'sent_embed_1') sent_fc2 = fully_connected(sent_fc1, embed_dim, activation_fn=None, scope='sent_embed_2') fc_bnorm_sent = tf.layers.batch_normalization(sent_fc2, momentum=0.1, epsilon=1e-5, training=train_phase) # sent_fc2 = tf.layers.dense(sent_fc1, embed_dim, activation=None) s_embed = tf.nn.l2_normalize(fc_bnorm_sent, 1, epsilon=1e-10) return i_embed, s_embed
def _get_scale_tril(self, _input, hid_layer=None): """ """ dirs = self.directives num_layers = dirs['num_layers'] num_nodes = dirs['num_nodes'] activation = dirs['activation'] net_grow_rate = dirs['net_grow_rate'] output_dim = self._oslot_to_shape[0][-1] with tf.variable_scope(self.name + '_scale', reuse=tf.AUTO_REUSE): if dirs['share_params']: output_chol = fully_connected(hid_layer, output_dim**2, activation_fn=None) else: print("_input:", _input) hid_layer = fully_connected( _input, num_nodes, activation_fn=activation, biases_initializer=tf.random_normal_initializer( stddev=1 / np.sqrt(num_nodes))) for _ in range(num_layers - 1): num_nodes = int(num_nodes * net_grow_rate) hid_layer = fully_connected( hid_layer, num_nodes, activation_fn=activation, biases_initializer=tf.random_normal_initializer( stddev=1 / np.sqrt(num_nodes))) output_chol = fully_connected( hid_layer, output_dim**2, activation_fn=None, weights_initializer=tf.random_normal_initializer( stddev=1e-4), biases_initializer=tf.random_normal_initializer( stddev=1 / np.sqrt(output_dim**2))) # normalizer_fn=lambda x : x/tf.sqrt(x**2), output_chol = tf.reshape( output_chol, # shape=[self.batch_size, output_dim, output_dim]) shape=[-1, output_dim, output_dim]) return output_chol
def setup_img_model(self, im_feats, train_phase): im_fc1 = add_fc(im_feats, self.fc_dim, train_phase, 'im_embed_1') im_fc2 = fully_connected(im_fc1, self.embed_dim, activation_fn=None, scope='im_embed_2') i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10) return i_embed
def discriminator(x,y): ''' args: x: data vector returns: net: discriminator network ''' xf = layers.fully_connected(x, 1000, scope='fcx') yf = layers.fully_connected(y, 200, scope='fcy') net = tf.concat(1, [xf, yf]) net = layers.dropout(net, 0.2, scope='do1') net = layers.fully_connected(net, 1200, activation_fn=tf.nn.sigmoid, scope='fc1') net = layers.dropout(net, 0.5, scope='do2') net = layers.fully_connected(net, 1200, activation_fn=tf.nn.sigmoid, scope='fc2') net = compare_to_minibatch(net) # no activation function because it's in the cost function used later. net = layers.fully_connected(net, 1, scope='fc3', activation_fn=None) return net
def embedding_model(im_feats, sent_feats, train_phase, im_labels, fc_dim = 1024, embed_dim = 512): """ Build two-branch embedding networks. fc_dim: the output dimension of the first fc layer. embed_dim: the output dimension of the second fc layer, i.e. embedding space dimension. """ # Image branch. #is_training = tf.placeholder(dtype=tf.bool, shape=()) im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1') #im_fc2=add_fc(im_fc1,512,train_phase,'im_embed_2') #im_fc3=add_fc(im_fc2,256,train_phase,'im_embed_3') im_fc2 = fully_connected(im_fc1, embed_dim, activation_fn=None, scope = 'im_embed_2') i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10) #im_fc3 = fully_connected(i_embed, 30, activation_fn=None, # scope = 'im_embed_3') #attr1 = tf.nn.sigmoid(im_fc3,name = 'attr_rec1') # Text branch. #print("type",sent_feats) sent_f=tf.to_int32(sent_feats,name='ToInt32') sent_f=tf.one_hot(sent_f,2) sent_fc0 = add_fc(sent_f, 4, train_phase,'sent_embed_0') #print("sent_one_hot",sent_f) #sent_fc0 = add_fc(sent_f,10,train_phase,'sent_embed_0') sent_fc0 = tf.layers.flatten(sent_fc0) #print("sent_fc_0",sent_fc0) #sent_fc1 = add_fc(sent_fc0, 128, train_phase,'sent_embed_1') #print("sent_fc1",sent_fc1) sent_fc2 = add_fc(sent_fc0, 256, train_phase,'sent_embed_2') #sent_fc3 = add_fc(sent_fc2,512 , train_phase,'sent_embed_3') #sent_fc4 = add_fc(sent_fc2,1024 , train_phase,'sent_embed_4') sent_fc3 = fully_connected(sent_fc2, embed_dim, activation_fn=None, scope = 'sent_embed_3') s_embed = tf.nn.l2_normalize(sent_fc3, 1, epsilon=1e-10) #sent_fc3=im_fc3 = fully_connected(s_embed, 30, activation_fn=None, # scope = 'sent_embed_3') #attr2 = tf.nn.sigmoid(sent_fc3,name = 'attr_rec2') #attr2 = fully_connected(sent_fc2,30,activation_fn=None,scope = 'attr_rec2') return i_embed, s_embed
def decoder(self,x,keep_prob): d_fc1 = fully_connected(x, 4096, None) d_fc1 = tf.nn.dropout(d_fc1, keep_prob) #print d_fc1 d_unflat1 = tf.reshape(d_fc1, [-1,16,16,16]) #print d_unflat1 d_conv1 = convolution2d_transpose(d_unflat1, 32, [5,5], [2,2], padding='SAME') #print d_conv1 d_conv2 = convolution2d_transpose(d_conv1, 3, [5,5], [2,2], padding='SAME') #print d_conv2 return d_conv2
def encoder(self,x, keep_prob): e_conv1 = convolution2d(x, 32, [5,5], [2,2], padding='SAME') #print e_conv1 e_conv2 = convolution2d(e_conv1, 16, [5,5], [2,2], padding='SAME') #print e_conv2 e_flat1 = flatten(e_conv2) #print e_flat1 #e_flat1 = tf.nn.dropout(e_flat1 keep_prob) e_fc1 = fully_connected(e_flat1, self.params['final_layer'], None) e_fc1 = tf.nn.dropout(e_fc1, keep_prob) return e_fc1
def embedding_model(feats, train_phase, scope_name, fc_dim = n_inputs, embed_dim = n_hidden): """ Build two-branch embedding networks. fc_dim: the output dimension of the first fc layer. embed_dim: the output dimension of the second fc layer, i.e. embedding space dimension. """ # each branch. fc1 = add_fc(feats, fc_dim, train_phase, scope_name) fc2 = fully_connected(fc1, embed_dim, activation_fn=None, scope = scope_name + '_2') embed = tf.nn.l2_normalize(fc2, 1, epsilon=1e-10) return embed
def setup_img_model(im_feats, train_phase, args, fc_dim=2048, embed_dim=512): if args.init_filename: parameters = pickle.load(open(args.init_filename, 'rb')) i_embed = setup_initialize_fc_layers(im_feats, parameters, 'vis', train_phase, args) else: im_fc1 = add_fc(im_feats, fc_dim, train_phase, 'im_embed_1') im_fc2 = fully_connected( im_fc1, embed_dim, activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(0.0005), scope='im_embed_2') i_embed = tf.nn.l2_normalize(im_fc2, 1, epsilon=1e-10) return i_embed
def compare_to_minibatch(input, num_kernels=5, kernel_dim=3): ''' take output of intermediate layer of the discriminator, and compare individual samples within the minibatch ''' #multiply discriminator layer by 3D tensor to produce matrix x = layers.fully_connected(input, num_kernels * kernel_dim) activation = tf.reshape(x, (-1, num_kernels, kernel_dim)) #compute L1-distance between rows of matrix diffs = tf.expand_dims(activation, 3) - \ tf.expand_dims(tf.transpose(activation, [1, 2, 0]), 0) abs_diffs = tf.reduce_sum(tf.abs(diffs), 2) #apply negative exponential minibatch_features = tf.reduce_sum(tf.exp(-abs_diffs), 2) return tf.concat(1, [input, minibatch_features])
def universal_embedding_layer(embedded_word_ids, tokens, embed_dim, suffix, trainable=True): universal_embedding = fully_connected( embedded_word_ids, embed_dim, activation_fn=None, weights_regularizer=tf.contrib.layers.l2_regularizer(0.005), trainable=trainable, scope='mule_' + suffix) num_words = tf.reduce_sum(tf.to_float(tokens > 0), 1, keep_dims=True) + 1e-10 avg_words = tf.nn.l2_normalize( tf.reduce_sum(universal_embedding, 1) / num_words, 1) return universal_embedding, avg_words
def get_phrase_scores(self, phrase_embed, region_embed, concept_weights): elementwise_prod = tf.expand_dims(phrase_embed, 2) * tf.expand_dims( region_embed, 1) joint_embed_1 = add_fc(elementwise_prod, self.embed_dim, self.train_phase, 'joint_embed_1') joint_embed_2 = concept_layer(joint_embed_1, self.final_embed, self.train_phase, 1, concept_weights) for concept_id in range(2, self.args.num_embeddings + 1): joint_embed_2 += concept_layer(joint_embed_1, self.final_embed, self.train_phase, concept_id, concept_weights) joint_embed_3 = fully_connected( joint_embed_2, 1, activation_fn=None, weights_regularizer=l2_regularizer(0.005), scope='joint_embed_3') joint_embed_3 = tf.squeeze(joint_embed_3, [3]) region_prob = 1. / (1. + tf.exp(-joint_embed_3)) return region_prob, joint_embed_3