def model_fn(features, labels, mode, params): img = features['images'] # (image_n,h, w, c) cap = features['captions'] # (caption_n, depth) ass = features['assignments'] # (caption_n,) attn, sen = attention_fn(img, mode=mode, params=params) ctx = apply_attn(img=img, att=attn, sen=sen) # (image_n, frames, c) decoder_ctx = tf.gather(ctx, ass, axis=0) # (caption_n, frames, c) logits = decoder_fn(decoder_ctx, sen, cap, mode, params) # (caption_n, depth, vocab) classes = tf.argmax(logits, axis=2) if mode == tf.estimator.ModeKeys.PREDICT: predictions = { "classes": classes } return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) else: loss = tf.reduce_mean(cross_entropy(labels=cap, logits=logits)) if params.l2 > 0: reg = apply_regularization(l2_regularizer(params.l2), tf.trainable_variables()) tf.summary.scalar("regularization", reg) loss += reg if mode == tf.estimator.ModeKeys.TRAIN: lr = tf.train.exponential_decay(params.lr, decay_rate=params.decay_rate, decay_steps=params.decay_steps, global_step=tf.train.get_global_step(), name='learning_rate', staircase=False) tf.summary.scalar('learning_rate', lr) if params.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=lr) elif params.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer(learning_rate=lr, momentum=params.momentum) elif params.optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate=lr, momentum=params.momentum) else: raise ValueError("Unknown optimizer: {}".format(params.optimizer)) print("Trainable: {}".format(list(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)))) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) else: eval_metric_ops = {} return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def loss(self, data, labels): """The loss to minimize while training.""" if self.is_regression: diff = self.training_inference_graph(data) - math_ops.to_float(labels) mean_squared_error = math_ops.reduce_mean(diff * diff) root_mean_squared_error = math_ops.sqrt(mean_squared_error, name="loss") loss = root_mean_squared_error else: loss = math_ops.reduce_mean( nn_ops.sparse_softmax_cross_entropy_with_logits( self.training_inference_graph(data), array_ops.squeeze(math_ops.to_int32(labels))), name="loss") if self.regularizer: loss += layers.apply_regularization(self.regularizer, variables.trainable_variables()) return loss
def _build_model(self): self.add_placeholder() # short term memory with tf.variable_scope("short_term"): conv = self.conv1d(self.input_x, self.config.kernel_sizes, self.config.num_filters, scope="short_term") gru_outputs = self.gru(conv, scope="short_gru") # [b, t, d] context = self.temporal_attention(gru_outputs) # [b, d] last_hidden_states = gru_outputs[:, -1, :] # [b, d] linear_inputs = tf.concat([context, last_hidden_states], axis=1) # linear_inputs [b, 2d] -> [b, nfeatures, -1] linear_inputs = tf.stack(tf.split(linear_inputs, self.config.nfeatures, axis=1), axis=1) # logits [b, nfeatures, -1] -> [b, nfeatures, nbins] logits = tf.layers.dense( linear_inputs, self.config.nbins, activation=None, use_bias=True, kernel_regularizer=self.regularizer, kernel_initializer=layers.xavier_initializer()) # get predictions self.predictions = tf.argmax(logits, axis=-1, output_type=tf.int32) weights = tf.ones(tf.shape(self.targets)) self.loss = tf.contrib.seq2seq.sequence_loss(labels=self.targets, logits=logits, weights=weights) self.acc = tf.reduce_mean( tf.cast(tf.equal(self.predictions, self.targets), dtype=tf.float32)) if self.config.l2_lambda > 0: reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) reg_term = layers.apply_regularization(self.regularizer, reg_vars) self.loss += reg_term self.add_train_op() self.initialize_session()
def _build_model(self): self.add_placeholder() # short term memory with tf.variable_scope("short_term"): conv = self.conv1d(self.input_x, self.config.kernel_sizes, self.config.num_filters, scope="short_term") gru_outputs = self.gru(conv, scope="short_gru") # [b, t, d] context = self.temporal_attention(gru_outputs) # [b, d] last_hidden_states = gru_outputs[:, -1, :] # [b, d] linear_inputs = tf.concat([context, last_hidden_states], axis=1) # prediction and loss predictions = tf.layers.dense( linear_inputs, self.config.nfeatures, activation=tf.nn.tanh, use_bias=True, kernel_regularizer=self.regularizer, kernel_initializer=layers.xavier_initializer()) # get auto-regression and add it to prediction from NN ar, ar_loss = self.auto_regressive(self.input_x, self.config.ar_lambda) self.predictions = predictions + ar self.loss = tf.losses.mean_squared_error(labels=self.targets, predictions=self.predictions) error = tf.reduce_sum((self.targets - self.predictions)**2)**0.5 denom = tf.reduce_sum( (self.targets - tf.reduce_mean(self.targets))**2)**0.5 self.rse = error / denom self.mae = tf.reduce_mean(tf.abs(self.targets - self.predictions)) self.mape = tf.reduce_mean( tf.abs((self.targets - self.predictions) / self.targets)) if self.config.l2_lambda > 0: reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) reg_term = layers.apply_regularization(self.regularizer, reg_vars) self.loss += reg_term self.loss += ar_loss self.add_train_op() self.initialize_session()
def build_graph(self): self.construct_weights() saver, logits = self.forward_pass() log_softmax_var = tf.nn.log_softmax(logits) # per-user average negative log-likelihood neg_ll = -tf.reduce_mean( tf.reduce_sum(log_softmax_var * self.input_ph, axis=1)) # apply regularization to weights reg = l2_regularizer(self.lam) reg_var = apply_regularization(reg, self.weights) # tensorflow l2 regularization multiply 0.5 to the l2 norm # multiply 2 so that it is back in the same scale loss = neg_ll + 2 * reg_var train_op = tf.train.AdamOptimizer(self.lr).minimize(loss) # add summary statistics tf.summary.scalar('negative_multi_ll', neg_ll) tf.summary.scalar('loss', loss) merged = tf.summary.merge_all() return saver, logits, loss, train_op, merged
def build_graph(self): self._construct_weights() saver, logits, KL = self.forward_pass() log_softmax_var = tf.nn.log_softmax(logits) neg_ll = -tf.reduce_mean( tf.reduce_sum(log_softmax_var * self.input_ph, axis=-1)) # apply regularization to weights reg = l2_regularizer(self.lam) reg_var = apply_regularization(reg, self.weights_q + self.weights_p) # tensorflow l2 regularization multiply 0.5 to the l2 norm # multiply 2 so that it is back in the same scale neg_ELBO = neg_ll + self.anneal_ph * KL + 2 * reg_var train_op = tf.train.AdamOptimizer(self.lr).minimize(neg_ELBO) # add summary statistics tf.summary.scalar('negative_multi_ll', neg_ll) tf.summary.scalar('KL', KL) tf.summary.scalar('neg_ELBO_train', neg_ELBO) merged = tf.summary.merge_all() params = [] for elem in self.weights_q: params.append(elem) for elem in self.weights_p: params.append(elem) for elem in self.biases_q: params.append(elem) for elem in self.biases_p: params.append(elem) return tf.nn.softmax(logits), neg_ELBO, params
def build_graph(self): mu_q, std_q, KL = self.q_graph() sampled_z = mu_q + self.is_training_ph * \ self.epsilon * std_q # p-network logits = self.p_graph(sampled_z) ''' neg_ll = -tf.reduce_sum(self.input_ph * tf.log(logits) + (1 - self.input_ph) * tf.log(1 - logits), 1) ''' neg_ll = tf.reduce_sum( tf.square(tf.nn.l2_normalize(self.input_ph, 1) - logits), 1) # apply regularization to weights reg = l2_regularizer(self.lam) neg_reward = self.r_graph(sampled_z) reg_var = apply_regularization( reg, self.weights_q + self.weights_p + self.weights_r) # tensorflow l2 regularization multiply 0.5 to the l2 norm # multiply 2 so that it is back in the same scale neg_ELBO = neg_ll + self.anneal_ph * KL + 2 * reg_var - tf.multiply( self.reward, neg_reward) train_op = tf.train.AdamOptimizer(self.lr).minimize(neg_ELBO) # add summary statistics tf.summary.scalar('negative_multi_ll', neg_ll) tf.summary.scalar('KL', KL) tf.summary.scalar('neg_ELBO_train', neg_ELBO) merged = tf.summary.merge_all() return logits, neg_ELBO, train_op, merged, sampled_z
def test(self, test_list, modelpath): with self.graph.as_default(): c3d_net = [ ["conv", "conv1", [3, 3, 3, 3, 64], 'wc1', 'bc1'], ["maxpool", "pool1", [1, 1, 2, 2, 1]], ["conv", "conv2", [3, 3, 3, 64, 128], 'wc2', 'bc2'], ["maxpool", "pool2", [1, 2, 2, 2, 1]], ["conv", "conv3a", [3, 3, 3, 128, 256], 'wc3a', 'bc3a'], ["conv", "conv3b", [3, 3, 3, 256, 256], 'wc3b', 'bc3b'], ["maxpool", "pool3", [1, 2, 2, 2, 1]], ["conv", "conv4a", [3, 3, 3, 256, 512], 'wc4a', 'bc4a'], ["conv", "conv4b", [3, 3, 3, 512, 512], 'wc4b', 'bc4b'], ["maxpool", "pool4", [1, 2, 2, 2, 1]], ["conv", "conv5a", [3, 3, 3, 512, 512], 'wc5a', 'bc5a'], ["conv", "conv5b", [3, 3, 3, 512, 512], 'wc5b', 'bc5b'], ["maxpool", "pool5", [1, 2, 2, 2, 1]], # ["transpose", [0, 1, 4, 2, 3]], #only use it if you restore the sports1m_finetuning_ucf101.model, otherwise uncomment it,(e.g use conv3d_deepnetA_sport1m_iter_1900000_TF.model) ["reshape", [-1, 8192]], ["fc", "fc1", [8192, 4096], 'wd1', 'bd1', True], ["dropout", "dropout1", self.keep_prob], ["fc", "fc2", [4096, 4096],'wd2','bd2', True], ["dropout", "dropout2", self.keep_prob], ["fc", "fc3", [4096, self.num_class],'wout','bout',False], ] # print(tf.trainable_variables()) # print(var_list) # print(tf.get_collection(tf.GraphKeys.WEIGHTS)) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(config=config, graph=self.graph) as sess: logits = self.parseNet(self.inputs, c3d_net) softmax_logits = tf.nn.softmax(logits) # int_label = tf.one_hot(self.labels, self.num_class) int_label = self.labels # [bs,101]-->[bs*4 or 8 or 16,101] # int_label=tf.concat( # [int_label,int_label,int_label,int_label,],axis=0) # int_label=tf.cast(int_label,dtype=tf.int64) task_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=int_label)) # task_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = int_label)) # task_loss = -tf.reduce_sum(int_label*tf.log(logits)) acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=-1), int_label), tf.float32)) right_count = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=1), int_label), tf.int32)) reg_loss = layers.apply_regularization(layers.l2_regularizer(5e-4), tf.get_collection(tf.GraphKeys.WEIGHTS)) total_loss = task_loss + reg_loss # train_var_list = [v for v in tf.trainable_variables() if v.name.find("conv") == -1] train_op = tf.train.GradientDescentOptimizer(self.lr).minimize( total_loss, global_step=self.global_step) # train_op = tf.train.MomentumOptimizer(self.lr,0.9).minimize( # total_loss, global_step = self.global_step,var_list=train_var_list) total_para = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]) print('total_para:', total_para) # all CDC9 :28613120 #pool5 27655936 # train clip:762960 # test clip:302640 init = tf.global_variables_initializer() # var_list = [v for v in tf.trainable_variables() if v.name.find("conv") != -1] # 初始化只加载卷积层参数 # print(var_list) sess.run(init) checkpath = modelpath + 'models' ckpt = tf.train.get_checkpoint_state(checkpath) if ckpt and ckpt.model_checkpoint_path: print('ok!!!!!!!!!!!!!!!') saver = tf.train.Saver() saver.restore(sess, ckpt.model_checkpoint_path) print('resore!!!!!!!!!!!') else: print(ckpt) #saver = tf.train.import_meta_graph('./'+modelpath+'models/c3d_ucf_model-4999.meta') #saver = tf.train.Saver(tf.trainable_variables()) #saver.restore(sess,tf.train.latest_checkpoint('./'+modelpath+'models')) #saver.restore(sess,modelpath+'models/c3d_ucf_model-4999') # sess.run(init) # saver = tf.train.Saver(tf.trainable_variables()) # saver.restore(sess, modelpath + "c3d_ucf101_finetune_whole_iter_20000_TF.model") print("Model Loading Done!") step = 0 print_freq = 2 next_start_pos = 0 for one_epoch in range(1): epostarttime = time.time() starttime = time.time() total_v = 0.0 test_correct_num = 0 for i in tqdm(range(int(3783 / self.batch_size))): step += 1 total_v += self.batch_size train_batch, label_batch, next_start_pos, _, _ = read_clip_and_label( filename=test_list, batch_size=self.batch_size, num_frames_per_clip=self.CLIP_LENGTH, height=self.IMG_HEIGHT, width=self.IMG_WIDTH, start_pos=next_start_pos, shuffle=False ) assert len(train_batch)==self.batch_size train_batch = train_aug(train_batch, is_train=False, Crop_heith=self.CROP_HEIGHT, Crop_width=self.CROP_WIDTH,norm=True) val_feed = {self.inputs: train_batch, self.labels: label_batch} test_correct_num += sess.run(right_count, val_feed) print('test acc:', test_correct_num / total_v, 'test_correct_num:', test_correct_num, 'total_v:', total_v)
def build_model(self): with tf.variable_scope("Input_Embedding_Layer"): with tf.variable_scope('Char_Conv', reuse=tf.AUTO_REUSE): ch_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.char_mat, self.contc_input), 1.0 - self.dropout_emb) qh_emb = tf.nn.dropout( tf.nn.embedding_lookup(self.char_mat, self.quesc_input), 1.0 - self.dropout_emb) ch_emb = CharCNN(ch_emb, self.char_limit, self.char_dim, self.filters, self.c_maxlen) qh_emb = CharCNN(qh_emb, self.char_limit, self.char_dim, self.filters, self.q_maxlen) c_emb0 = tf.nn.dropout( tf.nn.embedding_lookup(self.word_mat, self.contw_input), 1.0 - self.dropout_emb) q_emb0 = tf.nn.dropout( tf.nn.embedding_lookup(self.word_mat, self.quesw_input), 1.0 - self.dropout_emb) # cove features if self.use_cove == 2: cove_cont_low, cove_cont_high = tf.unstack(tf.stop_gradient( self.cove_model(c_emb0)), axis=0) cove_ques_low, cove_ques_high = tf.unstack(tf.stop_gradient( self.cove_model(q_emb0)), axis=0) else: cove_cont_low, cove_cont_high = self.cove_cont_low, self.cove_cont_high cove_ques_low, cove_ques_high = self.cove_ques_low, self.cove_ques_high # pre alignment c2q_prealign = AttentionLayer(c_emb0, q_emb0, q_emb0, self.q_mask, self.filters, self.dropout_att) c_emb = tf.concat([c_emb0, ch_emb, cove_cont_low], axis=-1) q_emb = tf.concat([q_emb0, qh_emb, cove_ques_low], axis=-1) c_emb = tf.nn.dropout(c_emb, 1 - self.dropout) q_emb = tf.nn.dropout(q_emb, 1 - self.dropout) # FeedForward layer with tf.variable_scope('FeedForward_Layer'): c_emb = FeedForward(c_emb, self.filters, self.dropout, name='cont_ff') q_emb = FeedForward(q_emb, self.filters, self.dropout, name='ques_ff') with tf.variable_scope('Encoder_Layers'): with tf.variable_scope('Contextual_Encoder', reuse=tf.AUTO_REUSE): # context encode c_emb_low = tf.concat([c_emb, c2q_prealign, cove_cont_low], axis=-1) c_emb_low = BiLSTM(c_emb_low, filters=self.filters, name='cont_lstm_low', dropout=self.dropout_rnn) c_emb_high = tf.concat([c_emb_low, cove_cont_high], axis=-1) c_emb_high = BiLSTM(c_emb_high, filters=self.filters, name='cont_lstm_high', dropout=self.dropout_rnn) c_emb_high = tf.nn.dropout(c_emb_high, 1 - self.dropout) # question encode q_emb_low = tf.concat([q_emb, cove_ques_low], axis=-1) q_emb_low = BiLSTM(q_emb_low, filters=self.filters, name='ques_lstm_low', dropout=self.dropout_rnn) q_emb_high = tf.concat([q_emb_low, cove_ques_high], axis=-1) q_emb_high = BiLSTM(q_emb_high, filters=self.filters, name='ques_lstm_high', dropout=self.dropout_rnn) q_mem_hidden = BiLSTM(tf.concat([q_emb_low, q_emb_high], axis=-1), self.filters, self.dropout_rnn, name='ques_lstm_memory') # c2q encode with tf.variable_scope('C2Q_Attention_Encoder'): c_att_input = tf.concat( [c_emb0, cove_cont_high, c_emb_low, c_emb_high], axis=-1) q_att_input = tf.concat( [q_emb0, cove_ques_high, q_emb_low, q_emb_high], axis=-1) v_att_input = [q_emb_low, q_emb_high, q_mem_hidden] c2q_att_hidden = DeepAttentionLayers(c_att_input, q_att_input, v_att_input, self.q_mask, self.filters, self.dropout_att, name='C2Q_Attention') c_mem_hidden = BiLSTM(tf.concat( [c2q_att_hidden, c_emb_low, c_emb_high], axis=-1), self.filters, self.dropout_rnn, name='cont_lstm_memory') # self attention with tf.variable_scope('Self_Attention_Encoder'): c_mem_input = tf.concat([ c2q_att_hidden, c_mem_hidden, c_emb_low, c_emb_high, cove_cont_high, c_emb0 ], axis=-1) c_self_hidden = AttentionLayer(c_mem_input, c_mem_input, c_mem_input, self.c_mask, self.filters, self.dropout_att) c_mem = BiLSTM(tf.concat([c_self_hidden, c_mem_hidden], axis=-1), self.filters, self.dropout_rnn, name='cont_self_memory') q_mem = SumAttention(q_mem_hidden, self.q_mask, self.dropout_att) with tf.variable_scope('Point_Network'): start_scores, end_scores = SAN(c_mem, q_mem, self.c_mask, filters=self.filters * 2, hidden_size=self.filters * 2, num_turn=5, name='SAN', dropout=self.dropout) self.unanswer_bias = tf.get_variable( "unanswer_bias", [1], initializer=tf.zeros_initializer()) self.unanswer_bias = tf.reshape( tf.tile(self.unanswer_bias, [self.un_size]), [-1, 1]) start_scores = tf.concat((self.unanswer_bias, start_scores), axis=-1) end_scores = tf.concat((self.unanswer_bias, end_scores), axis=-1) c_sum = SumAttention(c_mem, self.c_mask, self.dropout_att) pred_score = Dense(tf.concat([c_sum, q_mem], axis=-1), 1, norm=True, dropout=self.dropout) with tf.variable_scope('Loss_Layer'): start_loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=start_scores, labels=self.y_start) end_loss = tf.nn.softmax_cross_entropy_with_logits_v2( logits=end_scores, labels=self.y_end) self.loss_a = tf.reduce_mean(start_loss + end_loss) answer_exist_label = tf.squeeze(tf.cast( tf.slice(self.y_start, [0, 0], [-1, 1]), tf.float32), axis=-1) self.loss_c = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( logits=pred_score, labels=answer_exist_label)) self.loss = self.loss_a + self.gamma * self.loss_c # l2 loss if self.l2_norm is not None: variables = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) l2_loss = apply_regularization(regularizer, variables) self.loss += self.l2_norm * tf.reduce_sum(l2_loss) with tf.variable_scope('Output_Layer'): unanswer_mask = tf.cast(tf.less(tf.nn.sigmoid(pred_score), 0.5), tf.int64) # [bs,] has answer=1 no answer=0 unanswer_move = unanswer_mask - 1 # [bs,] has answer=0 no answer=-1 softmax_start_scores = tf.nn.softmax( tf.slice(start_scores, [0, 1], [-1, -1])) softmax_end_scores = tf.nn.softmax( tf.slice(end_scores, [0, 1], [-1, -1])) outer = tf.matmul(tf.expand_dims(softmax_start_scores, axis=2), tf.expand_dims(softmax_end_scores, axis=1)) outer = tf.matrix_band_part(outer, 0, self.ans_limit) def position_encoding(x): for i in range(x.shape[0]): for j in range(x.shape[1]): if j - i > 5: x[i][j] = float(1.0 / math.log(j - i + 1)) return x mask_mat = tf.ones((self.c_maxlen, self.c_maxlen)) mask_mat = tf.expand_dims(tf.py_func(position_encoding, [mask_mat], tf.float32), axis=0) mask_mat = tf.tile(mask_mat, [self.un_size, 1, 1]) outer_masked = outer * mask_mat self.mask_output1 = tf.argmax( tf.reduce_max(outer_masked, axis=2), axis=1) * unanswer_mask + unanswer_move self.mask_output2 = tf.argmax( tf.reduce_max(outer_masked, axis=1), axis=1) * unanswer_mask + unanswer_move
def build_model(self): self._build_placeholder() data = None # Load word vocab and char vocab if we are using pretrained embedding if self.parameter['use_word_pretrained'] or self.parameter[ 'use_char_pretrained']: with open('necessary.pkl', 'rb') as f: data = pickle.load(f) self._build_word_and_char_embedding(data) # 각각의 임베딩 값을 가져온다 self._embeddings = [] self._embeddings.append( tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph)) self._embeddings.append( tf.nn.embedding_lookup(self._embedding_matrix[1], self.character)) # 음절을 이용한 임베딩 값을 구한다. character_embedding = tf.reshape(self._embeddings[1], [ -1, self.parameter["word_length"], self.parameter["embedding"][1][2] ]) char_len = tf.reshape(self.character_len, [-1]) # Dropout after embedding, before lstm layer if self.parameter["use_dropout_after_embedding"]: character_embedding = tf.nn.dropout(character_embedding, self.emb_dropout_keep_prob) character_emb_rnn = self._build_birnn_model( character_embedding, char_len, self.parameter["char_lstm_units"], self.lstm_dropout_keep_prob, last=True, scope="char_layer") if self.parameter["use_lm"]: lm_word_embedding = tf.nn.embedding_lookup( self._embedding_matrix[0], self.encoder_inputs) lm_char_embedding = tf.nn.embedding_lookup( self._embedding_matrix[1], self.encoder_input_chars) lm_char_embedding = tf.reshape(lm_char_embedding, [ -1, self.parameter["word_length"], self.parameter["embedding"][1][2] ]) lm_char_len = tf.reshape(self.encoder_char_len, [-1]) lm_char_rnn = self._build_birnn_model( lm_char_embedding, lm_char_len, self.parameter["char_lstm_units"], self.lstm_dropout_keep_prob, last=True, scope="char_layer", lm=True) lm_all_emb = tf.concat( [self.lm_ne_dict, lm_word_embedding, lm_char_rnn], axis=2) if self.parameter["use_highway"]: lm_all_emb = self._build_highway(lm_all_emb, self.parameter["num_layers"], scope="highway") # 위에서 구한 모든 임베딩 값을 concat 한다. all_data_emb = self.ne_dict for i in range(0, len(self._embeddings) - 1): all_data_emb = tf.concat([all_data_emb, self._embeddings[i]], axis=2) all_data_emb = tf.concat([all_data_emb, character_emb_rnn], axis=2) if self.parameter["use_highway"]: all_data_emb = self._build_highway(all_data_emb, self.parameter["num_layers"], scope="highway") # Dropout after embedding, before lstm layer if self.parameter["use_dropout_after_embedding"]: all_data_emb = tf.nn.dropout(all_data_emb, self.emb_dropout_keep_prob) output_lst = [] # --------------------------------------- Add CONV Layer -------------------------------------------# # 1d depthwise-separable convolution if self.parameter["use_lm"]: conv_output = self._build_conv(lm_all_emb, self.parameter["kernel_sizes"], self.parameter["num_filters"], self.encoder_length, auto_regressive=True) self.lm_loss = self.lm_loss(conv_output, self.encoder_targets, NUM_VOCAB, self.encoder_length, scope="lm_loss") # remove go token conv_output = conv_output[:, 1:, :] else: conv_output = self._build_conv(all_data_emb, self.parameter["kernel_sizes"], self.parameter["num_filters"], self.sequence, auto_regressive=False) output_lst.append(conv_output) # --------------------------------------------------------------------------------------------------# # 모든 데이터를 가져와서 Bi-RNN 실시 lstm_output = self._build_birnn_model(all_data_emb, self.sequence, self.parameter["lstm_units"], self.lstm_dropout_keep_prob, scope="all_data_layer") output_lst.append(lstm_output) # self attention if self.parameter["use_self_attention"]: aligned_outputs = self._attention(lstm_output, self.parameter["lstm_units"], self.parameter["num_heads"], self.sequence, scope="attention_small") output_lst.append(aligned_outputs) if len(output_lst) != 1: outputs = tf.concat(output_lst, axis=2) else: outputs = output_lst[0] outputs = tf.nn.dropout(outputs, self.dropout_rate) # [b, t, 3*d] -> [b, t, C] logits = self._build_dense_layer(outputs) # crf layer crf_cost = self._build_crf_layer(logits) if self.parameter["use_reg_loss"]: reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) reg_term = layers.apply_regularization(self.regularizer, reg_vars) self.cost = crf_cost + reg_term else: self.cost = crf_cost if self.parameter["use_lm"]: self.cost += self.lm_loss * self.parameter["gamma"] self.train_op = self._build_output_layer(self.cost) # Exponential moving average if self.parameter["use_ema"]: var_ema = tf.train.ExponentialMovingAverage( decay=self.parameter["ema_decay_rate"]) ema_op = var_ema.apply(tf.trainable_variables()) with tf.control_dependencies([ema_op]): self.cost = tf.identity(self.cost)
def build(self, x, y, batch_size, learning_rate): # ==== conditional variational auto-encoder mu, sigma = self.encode(x) z = mu + sigma * tf.random_normal(tf.shape(mu)) z_sample = tf.random_normal(tf.shape(mu)) dis_z = self.discriminate(z) dis_z_sample = self.discriminate(z_sample) x_hat = self.decode(z) y_hat = self.classify(z) z_fake = tf.random_normal(tf.shape(mu)) x_fake_hat = self.decode(z_fake) # ==== variables variables = dict() var = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES) sizes = 0 for v in var: sv = v.value().shape size = 1 for s in sv: size *= int(s) sizes += size print('total variables:', sizes) var_enc = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._encoder_name) var_gen = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._generator_name) var_cla = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._classifier_name) var_dis = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope=self._discriminator_name) variables['enc'] = var_enc variables['gen'] = var_gen variables['cla'] = var_cla variables['dis'] = var_dis # ==== define losses loss = dict() # Loss stage1: SAE loss['d_cla'] = tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=y, logits=y_hat)) loss['d_KL'] = self.KL_divergence(mu, sigma) loss['g_rec'] = tf.reduce_sum(tf.square(x - x_hat)) / batch_size loss_reg = tcl.apply_regularization(tcl.l2_regularizer(1e-4), weights_list=var_enc + var_gen + var_cla) loss['SAE'] = loss['d_cla'] + loss['d_KL'] + loss['g_rec'] + loss_reg # Loss stage2: Gan loss['dis'] = tf.reduce_mean(0.5 * tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_z, labels=tf.ones_like(dis_z)) + 0.5 * tf.nn.sigmoid_cross_entropy_with_logits(logits=dis_z_sample, labels=tf.zeros_like(dis_z_sample))) # ==== define optimizer optimizer = tf.train.AdamOptimizer optim = dict() # ---- Stage1: svae optimizer optim['SAE'] = optimizer(learning_rate=learning_rate, beta1=0.5).minimize(loss['SAE'], var_list=var_enc + var_gen + var_cla) # ---- Stage 2: gan optimizer optim['DIS'] = optimizer(learning_rate=learning_rate, beta1=0.5).minimize(loss['dis'], var_list=var_dis) # ==== define metrics metric = dict() # ---- svae metric metric['acc'] = tf.reduce_mean(tf.cast(tf.equal(tf.round(tf.sigmoid(y_hat)), y), tf.float32)) metric['x_hat'] = x_hat metric['latent'] = mu metric['x_fake_hat'] = x_fake_hat # ---- gan metric metric['acc_dis_true'] = tf.reduce_mean(tf.cast(dis_z >= 0.5, tf.float32)) metric['acc_dis_fake'] = tf.reduce_mean(tf.cast(dis_z_sample < 0.5, tf.float32)) return loss, optim, metric, variables
def model_fn(features, labels, mode, params): img = features['images'] # (image_n,h, w, c) temperature = get_temperature(params) img_attn, img_sen = attention_fn(img, temperature=temperature, mode=mode, params=params) if params.use_slot_vocab: img_ctx = apply_attn( img=img, att=img_attn) # , sen=img_sen) # (image_n, frames, c) slot_vocab = slot_vocab_fn(img_ctx=img_ctx, params=params) # (image_n, frames, vocab+1) if img_sen is not None: slot_vocab *= tf.expand_dims(img_sen, axis=2) else: img_ctx = apply_attn(img=img, att=img_attn, sen=img_sen) # (image_n, frames, c) slot_vocab = None if mode == tf.estimator.ModeKeys.PREDICT: n = tf.shape(img_ctx)[0] if params.vae_dim > 0: enc = tf.random_normal(shape=(n, params.vae_dim), mean=0, stddev=1, dtype=tf.float32) else: enc = None logits, slot_attn, slot_sentinel, y1 = predict_decoder_fn( slot_vocab=slot_vocab, img_ctx=img_ctx, sen=img_sen, enc=enc, params=params, depth=30, temperature=temperature, mode=mode) predictions = { 'captions': y1, 'image_ids': tf.get_default_graph().get_tensor_by_name('image_ids:0'), 'slot_attention': slot_attn, 'slot_sentinel': slot_sentinel, 'image_attention': img_attn } if slot_vocab is not None: predictions['slot_vocab'] = slot_vocab if img_sen is not None: predictions['image_sentinel'] = img_sen return tf.estimator.EstimatorSpec(mode=mode, predictions=predictions) else: raw_cap = features['captions'] # (caption_n, depth) cap = tf.maximum(raw_cap - 1, 0) # (caption_n, depth) cap_mask = 1. - tf.cast(tf.equal(raw_cap, 0), tf.float32) # (caption_n, depth) ass = features['assignments'] # (caption_n,) if slot_vocab is not None: decoder_vocab = tf.gather(slot_vocab, ass, axis=0) # (caption_n, frames, c) else: decoder_vocab = None if img_sen is not None: decoder_sen = tf.gather(img_sen, ass, axis=0) # (caption_n, frames) else: decoder_sen = None decoder_img_ctx = tf.gather(img_ctx, ass, axis=0) if params.vae_dim > 0: mu, raw_sig = encoder_fn(img_ctx=decoder_img_ctx, sen=decoder_sen, slot_vocab=decoder_vocab, mask=cap_mask, cap=cap, temperature=temperature, params=params, mode=mode) sigma = EPSILON + tf.exp(raw_sig) enc = mu + (sigma * tf.random_normal(tf.shape(mu), 0, 1, dtype=tf.float32)) kl_loss = 0.5 * tf.reduce_sum( tf.square(mu) + tf.square(sigma) - tf.log(EPSILON + tf.square(sigma)) - 1, 1) kl_loss = tf.reduce_mean(kl_loss, 0) tf.summary.scalar('kl_loss', kl_loss) else: enc = None logits, slot_attn, slot_sentinel = train_decoder_fn( slot_vocab=decoder_vocab, img_ctx=decoder_img_ctx, sen=decoder_sen, cap=cap, temperature=temperature, params=params, mode=mode, enc=enc) # Loss if params.loss == 'cross_entropy': loss = tf.reduce_mean( cross_entropy_loss(labels=cap, mask=cap_mask, logits=logits, smoothing=params.smoothing)) elif params.loss == 'nll': loss = tf.reduce_mean( nll_loss(labels=cap, mask=cap_mask, logits=logits, mean=False)) else: raise ValueError() if params.vae_dim > 0: kl_weight = get_kl_weight(params) loss += kl_weight * kl_loss # Regularization # slot_attn: (n, depth, frame_size) # slot_sentinel: (n, depth, 1) if params.l2 > 0: reg = apply_regularization(l2_regularizer(params.l2), tf.trainable_variables()) tf.summary.scalar("regularization", reg) loss += reg if params.unity_reg > 0 and decoder_sen is not None: slot_sum = tf.reduce_sum(tf.expand_dims(cap_mask, 2) * slot_attn * slot_sentinel, axis=1) # (n, frame_size) slot_diff = tf.square(slot_sum - decoder_sen) unity_regularization = params.unity_reg * tf.reduce_mean( tf.reduce_sum(slot_diff, 1)) tf.summary.scalar("unity_regularization", unity_regularization) loss += unity_regularization if (params.img_sen_l1 > 0 or params.img_sen_l2 > 0) and img_sen is not None: img_sen_reg = 0 if params.img_sen_l1 > 0: img_sen_reg += params.img_sen_l1 * tf.reduce_mean( tf.reduce_sum(img_sen, axis=1), axis=0) if params.img_sen_l2 > 0: img_sen_reg += params.img_sen_l2 * tf.reduce_mean( tf.square(tf.reduce_sum(img_sen, axis=1)), axis=0) tf.summary.scalar('image_sentinel_regularization', img_sen_reg) loss += img_sen_reg if mode == tf.estimator.ModeKeys.TRAIN: lr = tf.train.exponential_decay( params.lr, decay_rate=params.decay_rate, decay_steps=params.decay_steps, global_step=tf.train.get_global_step(), name='learning_rate', staircase=False) tf.summary.scalar('learning_rate', lr) if params.optimizer == 'adam': optimizer = tf.train.AdamOptimizer(learning_rate=lr) elif params.optimizer == 'momentum': optimizer = tf.train.MomentumOptimizer( learning_rate=lr, momentum=params.momentum) elif params.optimizer == 'rmsprop': optimizer = tf.train.RMSPropOptimizer(learning_rate=lr, momentum=params.momentum) else: raise ValueError("Unknown optimizer: {}".format( params.optimizer)) print("Trainable: {}".format( list(tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES)))) train_op = optimizer.minimize( loss=loss, global_step=tf.train.get_global_step()) return tf.estimator.EstimatorSpec(mode=mode, loss=loss, train_op=train_op) else: eval_metric_ops = {} return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metric_ops)
def buildGraph(input_placeholder_s1, input_placeholder_s2, labels_placeholder, mask_placeholder_s1, mask_placeholder_s2, dropout_placeholder, embeddings_matrix): params = tf.Variable(embeddings_matrix) tensor_s1 = tf.nn.embedding_lookup(params, input_placeholder_s1) tensor_s2 = tf.nn.embedding_lookup(params, input_placeholder_s2) embeddings_s1 = tf.reshape(tensor_s1, [-1, max_length, embed_size]) embeddings_s2 = tf.reshape(tensor_s2, [-1, max_length, embed_size]) #print embeddings_s1.shape #print tf.boolean_mask(embeddings_s1, mask_placeholder_s1, axis=1).shape #embeddings = tf.concat([tf.reduce_mean(tf.boolean_mask(embeddings_s1, mask_placeholder_s1), axis=1), tf.reduce_mean(tf.boolean_mask(embeddings_s2, mask_placeholder_s2), axis=1)], 0) #print embeddings.shape dropout_rate = dropout_placeholder preds = [] cell1 = LSTMCell(embed_size, hidden_size) cell2 = LSTMCell2(embed_size, hidden_size) c = tf.zeros([tf.shape(embeddings_s1)[0], hidden_size]) h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size]) initial_state = tf.contrib.rnn.LSTMStateTuple(c, h) l1 = tf.reduce_sum(tf.cast(mask_placeholder_s1, tf.int32), axis=1) outputs1, state1 = tf.nn.dynamic_rnn(cell1, embeddings_s1, dtype=tf.float32, initial_state=initial_state, sequence_length=l1) h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size]) initial_state = tf.contrib.rnn.LSTMStateTuple(state1.c, h) l2 = tf.reduce_sum(tf.cast(mask_placeholder_s2, tf.int32), axis=1) outputs2, state2 = tf.nn.dynamic_rnn(cell2, embeddings_s2, dtype=tf.float32, initial_state=initial_state, sequence_length=l2) #START HERE, CHECK PREDS, DO BITMASK FOR LOSSES, MAKE SURE OPTIMIZING CORRECT FUNCTION func = xavier_weight_init() U = tf.Variable(func([hidden_size, n_classes])) b1 = tf.Variable(tf.zeros([1, n_classes])) h_drop = tf.nn.dropout(state2.h, keep_prob=1 - dropout_rate) pred = tf.matmul(h_drop, U) + b1 tf.add_to_collection('ops_to_restore', pred) #pred = tf.add(tf.matmul(h_drop, U), b1, name="pred") loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_placeholder, logits=pred) loss = tf.reduce_mean(loss) regularizer = l1_l2_regularizer(l1_reg, l2_reg) reg_loss = apply_regularization(regularizer, tf.trainable_variables()) loss += reg_loss #y = labels_placeholder #loss = tf.nn.l2_loss(y-preds) #loss = tf.reduce_mean(loss) optimizer = tf.train.AdamOptimizer(learning_rate=lr) #train_op = optimizer.minimize(loss) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) gradients = optimizer.compute_gradients(loss) grads = [x[0] for x in gradients] grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm) gradients = [(grads[i], gradients[i][1]) for i in range(len(grads))] train_op = optimizer.apply_gradients(gradients) return pred, loss, train_op
def build_graph(self): """Builds the neural network graph.""" # define graph self.g = tf.Graph() with self.g.as_default(): # create and store a new session for the graph self.sess = tf.Session() # define placeholders self.x = tf.placeholder(shape=[None, self.dim_input], dtype=tf.float32) self.y = tf.placeholder(shape=[None, self.num_classes], dtype=tf.float32) # define simple model with tf.variable_scope('last_layer'): self.z = tf.layers.dense(inputs=self.x, units=self.num_classes) self.loss = tf.reduce_mean( tf.nn.softmax_cross_entropy_with_logits_v2( labels=self.y, logits=self.z)) self.output_probs = tf.nn.softmax(self.z) # Variables of the last layer self.ll_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) self.ll_vars_concat = tf.concat( [self.ll_vars[0], tf.expand_dims(self.ll_vars[1], axis=0)], 0) # Summary _variable_summaries(self.ll_vars_concat) # add regularization that acts as a unit Gaussian prior on the last layer regularizer = contrib_layers.l2_regularizer(1.0) # regularization prior = contrib_layers.apply_regularization(regularizer, self.ll_vars) self.bayesian_loss = self.n * self.loss + prior # saving the weights of last layer when running SGLD/SGD/MCMC algorithm self.saver = tf.train.Saver(var_list=self.ll_vars, max_to_keep=self.num_samples) # SGLD optimizer for the last layer if self.sampler in ['sgld', 'lmc']: step = self.step_size / self.n gd_opt = tf.train.GradientDescentOptimizer(step) grads_vars = gd_opt.compute_gradients(self.bayesian_loss) grads_vars_sgld = [] for g, v in grads_vars: if g is not None: s = list(v.name) s[v.name.rindex(':')] = '_' # Adding Gaussian noise to the gradient gaussian_noise = (np.sqrt(2. / step) * tf.random_normal(tf.shape(g))) g_sgld = g + gaussian_noise tf.summary.histogram(''.join(s) + '/grad_hist_mcmc', g / self.n) tf.summary.histogram(''.join(s) + '/gaussian_noise_hist_mcmc', gaussian_noise / self.n) tf.summary.histogram(''.join(s) + '/grad_total_hist_mcmc', g_sgld / self.n) grads_vars_sgld.append((g_sgld, v)) self.train_op = gd_opt.apply_gradients(grads_vars_sgld) # SGD optimizer for the last layer if self.sampler == 'sgd': gd_opt = tf.train.GradientDescentOptimizer(self.step_size) grads_vars_sgd = gd_opt.compute_gradients(self.loss) self.train_op = gd_opt.apply_gradients(grads_vars_sgd) for g, v in grads_vars_sgd: if g is not None: s = list(v.name) s[v.name.rindex(':')] = '_' tf.summary.histogram(''.join(s) + '/grad_hist_sgd', g) # Merge all the summaries and write them out self.all_summaries = tf.summary.merge_all() location = os.path.join(self.working_dir, 'logs') self.writer = tf.summary.FileWriter(location, graph=self.g) saver_network = tf.train.Saver(var_list=self.ll_vars) print('loading the network ...') # Restores from checkpoint # self.sess.run(tf.global_variables_initializer()) saver_network.restore(self.sess, self.model_dir) print('Graph successfully loaded.')
def train(): start_time_first = time.time() WORK_DIRECTORY = FLAGS.VIEW_PATH train_size, test_size, val_size = get_size() fqbt, rbt = init_bin_file('data/train.bin') fqbv, rbv = init_bin_file('data/val.bin') fqbe, rbe = init_bin_file('data/test.bin') data_node = tf.placeholder(tf.float32, shape=(None, FLAGS.NUM_SPEC, FLAGS.CHANNEL_NUMBER)) labels_node = tf.placeholder(tf.int64, shape=(None, FLAGS.LABEL_NUMBER)) keep_hidden = tf.placeholder(tf.float32) logits = model(data_node, keep_hidden) preds = tf.nn.softmax(logits, name='Output') tvs = [tv for tv in tf.trainable_variables()] count_trainable_params(tvs) loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=logits, labels=labels_node)) loss += apply_regularization(l2_regularizer(5e-4), tf.trainable_variables()) batch = tf.Variable(0, trainable=False) learning_rate = tf.train.exponential_decay(0.01, batch * FLAGS.BATCH_SIZE, train_size, 0.95, staircase=True) optimizer = tf.train.MomentumOptimizer( learning_rate, 0.9).minimize(loss, global_step=batch) eval_predictions = tf.nn.softmax(model(data_node, keep_hidden, reuse=True)) train_label_node, train_data_node = get_train_data(fqbt, rbt) val_label_node, val_data_node = get_train_data(fqbv, rbv) test_label_node, test_data_node = get_train_data(fqbe, rbe) saver = tf.train.Saver(tf.global_variables()) TRAIN_FREQUENCY = train_size // FLAGS.BATCH_SIZE * 2 TEST_FREQUENCY = TRAIN_FREQUENCY VAL_FREQUENCY = TRAIN_FREQUENCY SAVE_FREQUENCY = 10 * train_size // FLAGS.BATCH_SIZE with tf.Session() as sess: sess.run(tf.local_variables_initializer()) sess.run(tf.global_variables_initializer()) summary_writer = tf.summary.FileWriter(WORK_DIRECTORY, sess.graph) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) try: while not coord.should_stop(): start_time = time.time() for step in xrange(int(FLAGS.NUM_EPOCHS * train_size) // FLAGS.BATCH_SIZE): train_data, train_label = sess.run([train_data_node, train_label_node]) feed_dict = {data_node: train_data, labels_node: train_label, keep_hidden: 0.5} _, l, lr, pred = sess.run( [optimizer, loss, learning_rate, preds], feed_dict=feed_dict) if step != 0 and step % TRAIN_FREQUENCY == 0: et = time.time() - start_time print('Step %d (epoch %.2f), %.1f ms' % (step, float(step) * FLAGS.BATCH_SIZE / train_size, 1000 * et / TRAIN_FREQUENCY)) print('Minibatch loss: %.3f, learning rate: %.6f' % (l, lr)) print('Train accuracy: {:.3f}'.format(accuracy(pred, train_label))) start_time = time.time() if step != 0 and step % VAL_FREQUENCY == 0: val_label_total = np.zeros( (val_size // FLAGS.BATCH_SIZE * FLAGS.BATCH_SIZE, FLAGS.LABEL_NUMBER)) prediction_total = np.zeros( (val_size // FLAGS.BATCH_SIZE * FLAGS.BATCH_SIZE, FLAGS.LABEL_NUMBER)) for ti in xrange(val_size // FLAGS.BATCH_SIZE): offset = ti * FLAGS.BATCH_SIZE val_data, val_label = sess.run([val_data_node, val_label_node]) predictions = eval_in_batches( val_data, sess, eval_predictions, data_node, keep_hidden) prediction_total[offset:offset + FLAGS.BATCH_SIZE, :] = predictions val_label_total[offset:offset + FLAGS.BATCH_SIZE] = val_label acc = accuracy(prediction_total, val_label_total) print('Accuracy of validation: {:.3f}'.format(acc)) start_time = time.time() if step != 0 and step % TEST_FREQUENCY == 0: test_label_total = np.zeros( (test_size // FLAGS.BATCH_SIZE * FLAGS.BATCH_SIZE, FLAGS.LABEL_NUMBER)) prediction_total = np.zeros( (test_size // FLAGS.BATCH_SIZE * FLAGS.BATCH_SIZE, FLAGS.LABEL_NUMBER)) for ti in xrange(test_size // FLAGS.BATCH_SIZE): offset = ti * FLAGS.BATCH_SIZE test_data, test_label = sess.run([test_data_node, test_label_node]) predictions = eval_in_batches( test_data, sess, eval_predictions, data_node, keep_hidden) prediction_total[offset:offset + FLAGS.BATCH_SIZE, :] = predictions test_label_total[offset:offset + FLAGS.BATCH_SIZE] = test_label acc = accuracy(prediction_total, test_label_total) print('Accuracy of test: {:.3f}'.format(acc)) start_time = time.time() if step % SAVE_FREQUENCY == 0 and step != 0: if FLAGS.SAVE_MODEL: checkpoint_path = os.path.join(WORK_DIRECTORY, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) else: if FLAGS.SAVE_MODEL: checkpoint_path = os.path.join(WORK_DIRECTORY, 'model.ckpt') saver.save(sess, checkpoint_path, global_step=step) coord.request_stop() except tf.errors.OutOfRangeError: print('Done training -- epoch limit reached') finally: pass coord.join(threads) print('All training process costs {:.2f} seconds...'.format(time.time() - start_time_first))
def train(self, batch_xs, batch_ys, iterations): ''' :param batch_xs: :param batch_ys: shape [batch_size, 1] :param iterations: Iterations per epoch :return: ''' assert type(iterations) is int and iterations > 0 assert self.mode is 'train', 'current mode is %s, not training mode' % self.mode assert self.initialized, 'initialize_weights() or load_weights() must be called before call train()' logits = tf.squeeze(self.features, name='probability') prediction = tf.argmax(logits, axis=-1, output_type=tf.int32, name='prediction') # Summaries log configurations batch_ys = tf.reshape(batch_ys, [self.config.BATCH_SIZE, -1]) loss_t = self._init_loss(logits, batch_ys) if self.config.USE_REGULARIZER: reg = layers.l2_regularizer(self.config.REGULARIZE_SCALE) loss_t += layers.apply_regularization(reg, tf.trainable_variables()) mAP_t = tf.reduce_mean( tf.cast(tf.equal(prediction, batch_ys), dtype=tf.float32)) mAP_log = tf.summary.scalar('mAP', mAP_t) loss_log = tf.summary.scalar('loss', loss_t) summary_t = tf.summary.merge([mAP_log, loss_log]) # global_steps # global_step = tf.get_variable('global_step', dtype=tf.int32, trainable=False, # initializer=tf.constant_initializer) global_step = tf.Variable(self.global_step_init_value, trainable=False) # variable averages operation variable_averages = tf.train.ExponentialMovingAverage( decay=0.99, num_updates=global_step) variable_averages_op = variable_averages.apply( tf.trainable_variables()) # Exponential decay learning rate and optimizer configurations learning_rate = tf.train.exponential_decay(self.config.LEARNING_RATE, global_step, decay_steps=100, decay_rate=0.96, staircase=True, name='learning_rate') if self.config.OPTIMIZER == 'sgd': optim = tf.train.GradientDescentOptimizer(learning_rate) elif self.config.OPTIMIZER == 'adam': optim = tf.train.AdamOptimizer(learning_rate) else: print('[!]Unidentifiable optimizer: ' + self.config.OPTIMIZER) raise NotImplementedError train_step = optim.minimize(loss_t, global_step=global_step, name=self.config.OPTIMIZER) train_op = tf.group(train_step, variable_averages_op) # Init global step and learning rate self.sess.run(global_step.initializer) # Train time_begin = datetime.now() suffix = str(int(time())) writer = tf.summary.FileWriter(self.ckpt_dir, self.sess.graph, filename_suffix=suffix) logging.basicConfig(filename=os.path.join( self.ckpt_dir, 'train.output-{}.txt'.format(suffix)), level=logging.DEBUG) loops = iterations * self.config.TRAINING_EPOCH for it in range(loops): try: _, loss_val, sum_str, step_val, = self.sess.run( [ train_op, loss_t, summary_t, global_step, ], feed_dict={self._batch_xs: batch_xs}) # main train step if it % self.config.LOG_INTERVAL == 0: # log summaries step_val -= 1 # As 'global_step_t' already increased after 'sess.run(train_op)', here we decrease 'step_val' by one writer.add_summary(sum_str, step_val) time_elapse = datetime.now() - time_begin time_remain = time_elapse / (it + 1) * (loops - it - 1) msg = 'elapsed time:{} remaining time:{} step:{} loss:{}'. \ format(time_elapse, time_remain, step_val, loss_val) logmsg = processBar(it, loops, msg, 50) logging.info(logmsg) # if loss_val < save_limit and FLAGS.auto_save: # save_limit = loss_val # save(self.sess, # os.path.join(self.ckpt_dir, 'tmp_loss{:.3f}'.format(loss_val) + FLAGS.model_name), # step_val) except tf.errors.InvalidArgumentError as e: print( 'An error of type tf.errors.InvalidArgumentError has been ignored...' ) print(e.message) logging.error('tf.errors.InvalidArgumentError:\r\n' + e.message) continue except tf.errors.OutOfRangeError: writer.add_summary(sum_str, step_val) msg = 'Epoch reach the end, final loss value is {}'.format( loss_val) logmsg = processBar(it, loops, msg, 50) logging.info(logmsg) break time_elapse = datetime.now() - time_begin print('Training finish, elapsed time %s' % time_elapse)
def build_model(self): # add place holder self.contexts = tf.placeholder(shape=[None, None], dtype=tf.int32, name="context") self.context_legnths = tf.placeholder(shape=[None], dtype=tf.int32, name="c_length") self.questions = tf.placeholder(shape=[None, None], dtype=tf.int32, name="q") self.question_legnths = tf.placeholder(shape=[None], dtype=tf.int32, name="q_len") # [batch, num_sentences, num_words] self.sentences = tf.placeholder(shape=[None, None, None], dtype=tf.int32, name="sentences") # [num_sentences, num_words] self.sequence_lengths = tf.placeholder(shape=[None, None], dtype=tf.int32, name="seq_len") # [num_sentences] self.sentence_lengths = tf.placeholder(shape=[None], dtype=tf.int32, name="sent_len") self.sentence_idx = tf.placeholder(shape=[None], dtype=tf.int32, name="sent_idx") self.answerable = tf.placeholder(shape=[None], dtype=tf.int32, name="answ") self.answer_span = tf.placeholder(shape=[None, 2], dtype=tf.int32, name="answer_span") self.dropout = tf.placeholder(dtype=tf.float32, name="dropout") self.avg_loss = tf.placeholder(dtype=tf.float32, name="avg_loss") self.avg_em = tf.placeholder(dtype=tf.float32, name="avg_em") self.avg_acc = tf.placeholder(dtype=tf.float32, name="avg_acc") loss_summary = tf.summary.scalar("loss", self.avg_em) acc_summary = tf.summary.scalar("accuracy", self.avg_acc) em_summary = tf.summary.scalar("em", self.avg_em) self.merged = tf.summary.merge([loss_summary, acc_summary, em_summary]) self.document_size, self.sentence_size, self.word_size = tf.unstack( tf.shape(self.sentences)) # add embeddings zeros = tf.constant([[0.0] * self.config.embedding_size]) unk_dummy = tf.get_variable(shape=[2, self.config.embedding_size], initializer=layers.xavier_initializer(), name="special_token") # load pre-trained GloVe embedding_matrix = tf.Variable(initial_value=self.config.embeddings, trainable=False, dtype=tf.float32, name="embedding") self.embedding_matrix = tf.concat([zeros, unk_dummy, embedding_matrix], axis=0) self.embedded_sentences = tf.nn.embedding_lookup( self.embedding_matrix, self.sentences) self.embedded_sentences = tf.layers.dropout(self.embedded_sentences, self.dropout) self.embedded_context = tf.nn.embedding_lookup(self.embedding_matrix, self.contexts) self.embedded_context = tf.layers.dropout(self.embedded_context, self.dropout) self.embedded_questions = tf.nn.embedding_lookup( self.embedding_matrix, self.questions) self.embedded_questions = tf.layers.dropout(self.embedded_questions, self.dropout) # conv block and self attention block with tf.variable_scope("Embedding_Encoder_Layer"): contexts = self.residual_block(self.embedded_context, self.context_legnths, num_blocks=1, num_conv_blocks=4, kernel_size=7, num_filters=128, scope="Embedding_Encoder", reuse=False) questions = self.residual_block(self.embedded_questions, self.question_legnths, num_blocks=1, num_conv_blocks=4, kernel_size=7, num_filters=128, scope="Embedding_Encoder", reuse=True) reshaped_sentences = tf.reshape( self.embedded_sentences, [-1, self.word_size, self.config.embedding_size]) sentence_len = tf.reshape(self.sequence_lengths, [-1]) encoded_sentence = self.residual_block(reshaped_sentences, sentence_len, num_blocks=1, num_conv_blocks=1, kernel_size=7, num_filters=128, scope="Embedding_Encoder", reuse=True) with tf.variable_scope("hierarchical_attention") and tf.device( "/device:GPU:0"): # [b * s, w, d] cnn_inputs = tf.layers.dense( encoded_sentence, self.config.filter_size, kernel_regularizer=self.regularizer, kernel_initializer=layers.xavier_initializer(), activation=tf.nn.relu) sentence_cnn = self.conv_encoder(cnn_inputs, self.config.filter_size, scope="word_encoder", reuse=False) encoded_question = self.question_encoding(questions, self.question_legnths) # [b, s, d] sentence_vectors = self.word_level_attention( encoded_question, sentence_cnn, self.document_size, self.sentence_size, self.word_size, self.sequence_lengths) sentence_cnn = self.conv_encoder(sentence_vectors, self.config.filter_size, scope="sentence_encoder", reuse=False) document_vector, sentence_score = self.sentence_level_attention( encoded_question, sentence_cnn, self.sentence_size, self.sentence_lengths) self.attention_loss, self.binary_loss = self.auxiliary_loss( sentence_score, document_vector, encoded_question) with tf.variable_scope("Context_Query_Attention_Layer") and tf.device( "/device:GPU:0"): A, B = self.co_attention(questions, contexts, self.question_legnths, self.context_legnths) attention_outputs = [contexts, A, contexts * A, contexts * B] with tf.variable_scope("Model_Encoder_Layer"): inputs = tf.concat(attention_outputs, axis=2) inputs = tf.layers.dense( inputs, self.config.attention_size, kernel_regularizer=self.regularizer, kernel_initializer=layers.variance_scaling_initializer(), activation=tf.nn.relu) memories = [] for i in range(3): outputs = self.residual_block(inputs, self.context_legnths, num_blocks=7, num_conv_blocks=2, num_filters=128, kernel_size=5, scope="Model_Encoder", reuse=True if i > 0 else False) if i == 2: outputs = tf.layers.dropout(outputs, self.dropout) memories.append(outputs) inputs = outputs with tf.variable_scope("Output_Layer") and tf.device("/device:GPU:0"): logits_inputs = tf.concat([memories[0], memories[1]], axis=2) start_logits = self.pointer_network(document_vector, logits_inputs, self.context_legnths, scope="start_logits") logits_inputs = tf.concat([memories[0], memories[2]], axis=2) end_logits = self.pointer_network(document_vector, logits_inputs, self.context_legnths, scope="end_logits") start_label, end_label = tf.split(self.answer_span, 2, axis=1) start_label = tf.squeeze(start_label, axis=-1) end_label = tf.squeeze(end_label, axis=-1) losses1 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=start_logits, labels=start_label) losses2 = tf.nn.sparse_softmax_cross_entropy_with_logits( logits=end_logits, labels=end_label) cross_entropy_loss = tf.reduce_mean(losses1 + losses2) self.loss = cross_entropy_loss \ + self.config.alpha * self.attention_loss \ + self.config.beta * self.binary_loss # for inference logits1 = tf.nn.softmax(start_logits) logits2 = tf.nn.softmax(end_logits) outer_product = tf.matmul(tf.expand_dims(logits1, axis=2), tf.expand_dims(logits2, axis=1)) outer = tf.matrix_band_part(outer_product, 0, self.config.ans_limit) self.start = tf.argmax(tf.reduce_max(outer, axis=2), axis=1, output_type=tf.int32) self.end = tf.argmax(tf.reduce_max(outer, axis=1), axis=1, output_type=tf.int32) self.em = self.evaluate_em(self.start, self.end, self.answer_span, self.unans_prob) if self.config.l2_lambda > 0: vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) l2_loss = layers.apply_regularization(self.regularizer, vars) self.loss += l2_loss # Exponential moving average self.var_ema = tf.train.ExponentialMovingAverage(0.9999) ema_op = self.var_ema.apply(tf.trainable_variables()) with tf.control_dependencies([ema_op]): self.loss = tf.identity(self.loss) self.assign_vars = [] for var in tf.global_variables(): v = self.var_ema.average(var) if v: self.assign_vars.append(tf.assign(var, v)) self.add_train_op() self.init_session()
def build_graph(actor, critic, value, obs_dim, num_actions, batch_size, gamma, tau, actor_lr, critic_lr, value_lr, reg_factor, scope='sac', reuse=None): with tf.variable_scope(scope, reuse=reuse): # input placeholders obs_t_input = tf.placeholder(tf.float32, [None, obs_dim], name='obs_t') act_t_ph = tf.placeholder(tf.float32, [None, num_actions], name='action') rew_t_ph = tf.placeholder(tf.float32, [None], name='reward') obs_tp1_input = tf.placeholder(tf.float32, [None, obs_dim], name='obs_tp1') done_mask_ph = tf.placeholder(tf.float32, [None], name='done') # actor network policy_t, greedy_policy_t, log_pi_t, reg = actor(obs_t_input, num_actions, reg_factor=reg_factor, scope='actor') actor_func_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, '{}/actor'.format(scope)) # critic network q_t = critic(obs_t_input, act_t_ph, scope='critic') q_t_with_actor = critic(obs_t_input, policy_t, scope='critic', reuse=True) critic_func_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, '{}/critic'.format(scope)) # value network v_t = value(obs_t_input, scope='value') value_func_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, '{}/value'.format(scope)) # target value network v_tp1 = value(obs_tp1_input, scope='target_value') target_func_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, '{}/target_value'.format(scope)) with tf.variable_scope('value_loss'): target = q_t - log_pi_t value_loss = tf.reduce_mean( 0.5 * tf.square(v_t - tf.stop_gradient(target))) with tf.variable_scope('critic_loss'): target = rew_t_ph + gamma * v_tp1 * (1.0 - done_mask_ph) critic_loss = tf.reduce_mean( 0.5 * tf.square(q_t - tf.stop_gradient(target))) with tf.variable_scope('policy_loss'): target = q_t_with_actor - v_t actor_loss = 0.5 * tf.reduce_mean( log_pi_t * tf.stop_gradient(log_pi_t - target)) reg_variables = tf.get_collection( tf.GraphKeys.REGULARIZATION_LOSSES) l2_loss = layers.apply_regularization(reg, reg_variables) actor_loss = actor_loss + l2_loss # optimize operations critic_optimizer = tf.train.AdamOptimizer(critic_lr) critic_optimize_expr = critic_optimizer.minimize( critic_loss, var_list=critic_func_vars) actor_optimizer = tf.train.AdamOptimizer(actor_lr) actor_optimize_expr = actor_optimizer.minimize( actor_loss, var_list=actor_func_vars) value_optimizer = tf.train.AdamOptimizer(value_lr) value_optimize_expr = value_optimizer.minimize( value_loss, var_list=value_func_vars) # update critic target operations with tf.variable_scope('update_value_target'): update_target_expr = [] sorted_vars = sorted(value_func_vars, key=lambda v: v.name) sorted_target_vars = sorted(target_func_vars, key=lambda v: v.name) # assign value variables to target value variables for var, var_target in zip(sorted_vars, sorted_target_vars): new_var = tau * var + (1 - tau) * var_target update_target_expr.append(var_target.assign(new_var)) update_target_expr = tf.group(*update_target_expr) def act(obs): feed_dict = {obs_t_input: obs} return tf.get_default_session().run([policy_t, greedy_policy_t], feed_dict=feed_dict) def train_actor(obs, action): feed_dict = {obs_t_input: obs, act_t_ph: action} loss_val, _ = tf.get_default_session().run( [actor_loss, actor_optimize_expr], feed_dict=feed_dict) return loss_val def train_critic(obs_t, action, rew, obs_tp1, done): feed_dict = { obs_t_input: obs_t, act_t_ph: action, rew_t_ph: rew, obs_tp1_input: obs_tp1, done_mask_ph: done } loss_val, _ = tf.get_default_session().run( [critic_loss, critic_optimize_expr], feed_dict=feed_dict) return loss_val def train_value(obs_t, action): feed_dict = {obs_t_input: obs_t, act_t_ph: action} loss_val, _ = tf.get_default_session().run( [value_loss, value_optimize_expr], feed_dict=feed_dict) return loss_val def update_target(): tf.get_default_session().run(update_target_expr) return act, train_actor, train_critic, train_value, update_target
def _rpc_pooling(net, seq_lens, channel, target_len, unit, regularizer=None, conservative=True, is_training=True): def cond(net, seq_lens): return tf.greater(tf.shape(net)[1], target_len) def rpc(net, seq_lens): net = nn_ops.pool1d('max', net, pool_size=3, strides=2) seq_lens = tf.to_int32(tf.ceil(tf.to_float(seq_lens) / 2.0)) shortcut = net if unit == 'biconv': unit_fn = nn_ops.biconv_unit elif unit == 'bottleneck': unit_fn = nn_ops.bottleneck_unit elif unit == 'gated': unit_fn = nn_ops.gated_unit elif unit == 'lstm': unit_fn = nn_ops.lstm_unit else: raise NotImplementedError(unit) net = unit_fn(net, seq_lens, channel, ['rpc'], is_training) net = shortcut + net return net, seq_lens def body(net, seq_lens): net_rpc, seq_lens_rpc = rpc(net, seq_lens) length_rpc = tf.shape(net_rpc)[1] net_trimmed = tf.slice(net, begin=[0, 0, 0], size=[-1, length_rpc, -1]) net = tf.where(tf.greater(seq_lens, length_rpc), net_rpc, net_trimmed) seq_lens = tf.where(tf.greater(seq_lens, length_rpc), seq_lens_rpc, seq_lens) return net, seq_lens with tf.variable_scope('rpc_pooling'): batch, length, channel_in = net.get_shape().as_list() if channel_in != channel: net = nn_ops.temporal_batch_norm(net, seq_lens, center=True, scale=True, activation_fn=tf.nn.relu, is_training=is_training) net = slim.conv2d( net, channel, 3, 1, padding='SAME', activation_fn=None, biases_initializer=None, weights_initializer=layers.variance_scaling_initializer(), weights_regularizer=regularizer) net, seq_lens = tf.while_loop(cond, body, [net, seq_lens], shape_invariants=[ tf.TensorShape( [batch, None, channel]), seq_lens.get_shape() ]) rpc_weights = [ var for var in tf.get_collection('rpc') if 'weights' in var.name ] layers.apply_regularization(regularizer, rpc_weights) net = nn_ops.temporal_batch_norm(net, seq_lens, center=True, scale=True, activation_fn=tf.nn.relu, is_training=is_training) net = nn_ops.temporal_mean_pooling(net, seq_lens) return net
def build_graph(self): self._construct_weights() saver, logits, KL = self.forward_pass() log_softmax_var = tf.nn.log_softmax(logits, axis=-1) # apply regularization to weights reg = l2_regularizer(self.lam) reg_var = apply_regularization(reg, self.weights_q + self.weights_p) if self.test_mean_z == 1: neg_ll_all = tf.cond( tf.logical_not(tf.equal(self.is_training_ph, 1)), lambda: tf. reduce_sum(log_softmax_var * self.input_ph, axis=-1), lambda: tf.reduce_sum(log_softmax_var * tf.expand_dims( self.input_ph, axis=1), axis=-1)) else: neg_ll_all = tf.reduce_sum(log_softmax_var * tf.expand_dims(self.input_ph, axis=1), axis=-1) neg_ll_scalar = -tf.reduce_mean(neg_ll_all) KL_scalar = tf.reduce_mean(KL) neg_ELBO_scalar = neg_ll_scalar + self.anneal_ph * KL_scalar + 2 * reg_var if self.test_mean_z == 1: loss_iw = tf.cond( tf.logical_not(tf.equal(self.is_training_ph, 1)), lambda: neg_ELBO_scalar, lambda: tf.reduce_mean( tf.reduce_logsumexp(-neg_ll_all + self.anneal_ph * KL, 1 ) + tf.log(tf.cast(self.K, tf.float32)) )) #Only change this line for without importance sampling else: loss_iw = tf.reduce_mean( tf.reduce_logsumexp(-neg_ll_all + self.anneal_ph * KL, 1) + tf.log(tf.cast(self.K, tf.float32))) log_softmax_var = tf.cond( tf.logical_not(tf.equal(self.is_training_ph, 1)), lambda: tf.reduce_logsumexp(log_softmax_var, axis=1), lambda: log_softmax_var) neg_ELBO = loss_iw + 2 * reg_var trainer1 = tf.train.AdamOptimizer(self.lr) gradvars_1 = trainer1.compute_gradients(neg_ELBO, var_list=[ self.weights_p, self.biases_p, self.weights_q, self.biases_q ]) train_op1 = trainer1.apply_gradients(gradvars_1) # add summary statistics tf.summary.scalar('negative_multi_ll', neg_ll_scalar) tf.summary.scalar('KL', KL_scalar) tf.summary.scalar('neg_ELBO_train', neg_ELBO_scalar) merged = tf.summary.merge_all() trainer2 = tf.train.AdamOptimizer(self.lr) gradvars_2 = self.forward_pass_ARM() train_op2 = trainer2.apply_gradients([(gradvars_2, self.keep_prob_ph)]) with tf.control_dependencies([train_op1, train_op2]): train_op = tf.no_op() return saver, log_softmax_var, neg_ELBO, train_op, merged
-1.0 / math.sqrt(hiddenLayer3), 1.0 / math.sqrt(hiddenLayer3)), trainable=True) pi1 = tf.Variable(tf.zeros([inputSize]), trainable=True) pi2 = tf.Variable(tf.zeros([inputSize]), trainable=True) pi3 = tf.Variable(tf.zeros([inputSize]), trainable=True) E1 = tf.nn.sigmoid(tf.matmul(X, V1) + mu1) E2 = tf.nn.sigmoid(tf.add(tf.matmul(E1, V2), mu2)) E3 = tf.nn.sigmoid(tf.add(tf.matmul(E2, V3), mu3)) YS1 = tf.multiply(tf.identity(tf.add(tf.matmul(E1, S1), pi1)), mapping) YS2 = tf.multiply(tf.identity(tf.add(tf.matmul(E2, S2), pi2)), mapping) YS3 = tf.multiply(tf.identity(tf.add(tf.matmul(E3, S3), pi3)), mapping) Ypool = (YS1 + YS2 + YS3) / 3 regularize = layers.apply_regularization(layers.l2_regularizer(scale=lambdaR), weights_list=[V1, V2, V3, S1, S2, S3]) difference1NM = X - YS1 difference2NM = X - YS2 difference3NM = X - YS3 differencePool = X - Ypool Loss1NM = tf.reduce_sum(tf.square(difference1NM)) Loss2NM = tf.reduce_sum(tf.square(difference2NM)) Loss3NM = tf.reduce_sum(tf.square(difference3NM)) LossPool = tf.reduce_sum(tf.square(differencePool)) loss = Loss1NM + Loss2NM + Loss3NM + LossPool + regularize optimizer = layers.optimize_loss(loss=loss, global_step=tf.train.get_global_step(),
def train(train_record_file, train_log_step, train_param, val_record_file, val_log_step, labels_nums, data_shape, snapshot, snapshot_prefix): ''' :param train_record_file: 训练的tfrecord文件 :param train_log_step: 显示训练过程log信息间隔 :param train_param: train参数 :param val_record_file: 验证的tfrecord文件 :param val_log_step: 显示验证过程log信息间隔 :param val_param: val参数 :param labels_nums: labels数 :param data_shape: 输入数据shape :param snapshot: 保存模型间隔 :param snapshot_prefix: 保存模型文件的前缀名 :return: ''' [base_lr, max_steps] = train_param [batch_size, resize_height, resize_width, depths] = data_shape # 获得训练和测试的样本数 train_nums = get_example_nums(train_record_file) val_nums = get_example_nums(val_record_file) print('train nums:%d,val nums:%d' % (train_nums, val_nums)) # 从record中读取图片和labels数据 # train数据,训练数据一般要求打乱顺序shuffle=True train_images, train_labels = read_records(train_record_file, resize_height, resize_width, type='normalization') train_images_batch, train_labels_batch = get_batch_images( train_images, train_labels, batch_size=batch_size, labels_nums=labels_nums, one_hot=True, shuffle=False) # val数据,验证数据可以不需要打乱数据 val_images, val_labels = read_records(val_record_file, resize_height, resize_width, type='normalization') val_images_batch, val_labels_batch = get_batch_images( val_images, val_labels, batch_size=batch_size, labels_nums=labels_nums, one_hot=True, shuffle=False) reg = layers.l2_regularizer(scale=0.1) out = nets(inputs=input_images, num_classes=labels_nums, dropout_keep_prob=keep_prob, is_training=is_training, reg=reg) print("combine.shape={}".format(out.get_shape())) # tf.losses.softmax_cross_entropy(onehot_labels=input_labels, logits=out)#添加交叉熵损失loss=1.6 # pred = tf.cast(tf.argmax(tf.nn.softmax(out), 1), tf.int32) weight = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES) reg_ws = layers.apply_regularization(reg, weights_list=weight) loss1 = tf.reduce_sum( tf.nn.softmax_cross_entropy_with_logits(labels=input_labels, logits=out)) loss = loss1 + tf.reduce_sum(reg_ws) # 不加正则项loss<100,加上正则项loss>10000 tf.summary.scalar("loss", loss) train_op = tf.train.AdamOptimizer(base_lr).minimize(loss) # accuracy = tf.reduce_mean(tf.cast(tf.equal(pred, input_labels), tf.float32)) accuracy = tf.reduce_mean( tf.cast(tf.equal(tf.argmax(out, 1), tf.argmax(input_labels, 1)), tf.float32)) tf.summary.scalar("accuracy", accuracy) merged = tf.summary.merge_all() train_writer = tf.summary.FileWriter('./log', tf.get_default_graph()) saver = tf.train.Saver() max_acc = 0.0 with tf.Session() as sess: sess.run(tf.global_variables_initializer()) sess.run(tf.local_variables_initializer()) coord = tf.train.Coordinator() threads = tf.train.start_queue_runners(sess=sess, coord=coord) for i in range(max_steps + 1): batch_input_images, batch_input_labels = sess.run( [train_images_batch, train_labels_batch]) _, train_loss = sess.run( [train_op, loss], feed_dict={ input_images: batch_input_images, input_labels: batch_input_labels, keep_prob: 0.8, is_training: True }) # train测试(这里仅测试训练集的一个batch) if i % train_log_step == 0: train_acc, train_summary = sess.run( [accuracy, merged], feed_dict={ input_images: batch_input_images, input_labels: batch_input_labels, keep_prob: 1.0, is_training: False }) train_writer.add_summary(train_summary, i) print( "%s: Step [%d] train Loss : %f, training accuracy : %g" % (datetime.now(), i, train_loss, train_acc)) # val测试(测试全部val数据) if i % val_log_step == 0: mean_loss, mean_acc = net_evaluation(sess, loss, accuracy, val_images_batch, val_labels_batch, val_nums) print("%s: Step [%d] val Loss : %f, val accuracy : %g" % (datetime.now(), i, mean_loss, mean_acc)) # 模型保存:每迭代snapshot次或者最后一次保存模型 if (i % snapshot == 0 and i > 0) or i == max_steps: print('-----save:{}-{}'.format(snapshot_prefix, i)) saver.save(sess, snapshot_prefix, global_step=i) # 保存val准确率最高的模型 if mean_acc > max_acc and mean_acc > 0.5: max_acc = mean_acc path = os.path.dirname(snapshot_prefix) best_models = os.path.join( path, 'best_models_{}_{:.4f}.ckpt'.format(i, max_acc)) print('------save:{}'.format(best_models)) saver.save(sess, best_models) coord.request_stop() coord.join(threads) train_writer.close()
def __init__(self): #Lets start with defining the batch size. #it will same for noise and real data self.batch_size = 196 self.n_noise = 196 #We need to create 2 place holders to hold the data values for noise and data self.X_in = tf.placeholder(dtype=tf.float32, shape=[None, 28, 28], name='X') self.noise = tf.placeholder(dtype=tf.float32, shape=[None, self.n_noise]) #Here we call our generator to generate false data #we need to define dropout probability and training mode. self.g = Generator(self.noise, keep_prob, is_training) #Here we will create two dicriminator models. #these two will share same parameters(weights and biases). #one will operate on real data while other is on fake one. self.d_real = Discriminator(self.X_in) self.d_fake = Discriminator(self.g, reuse=True) #Separate the trainable variables for both generator as well as discriminator self.vars_g = [var for var in tf.trainable_variables() if var.name.startswith("generator")] self.vars_d = [var for var in tf.trainable_variables() if var.name.startswith("discriminator")] #Here we will apply some regularization on weights of #generator and discriminator self.d_reg = apply_regularization(l2_regularizer(1e-6),self.vars_d) self.g_reg = apply_regularization(l2_regularizer(1e-6), self.vars_g) #We will use binary cross entropy loss to measure the performance #of our discriminators self.loss_d_real = binary_cross_entropy(tf.ones_like(self.d_real), self.d_real) self.loss_d_fake = binary_cross_entropy(tf.zeros_like(self.d_fake), self.d_fake) #Here we will calculate the loss for both networks self.loss_g = tf.reduce_mean(binary_cross_entropy(tf.ones_like(self.d_fake), self.d_fake)) self.loss_d = tf.reduce_mean(0.5 * (self.loss_d_real + self.loss_d_fake)) #Let's update the graphs self.update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS) #Now its time to train the networks with tf.control_dependencies(self.update_ops): #Total loss of Discriminator with regularization total_loss_d = self.loss_d + self.d_reg #Total loss of Generator with regularization total_loss_g = self.loss_g + self.g_reg #Set the learning rate lr = 0.00015 #We will use RMSprop with SGD for training the networks self.optimizer_d = tf.train.RMSPropOptimizer(learning_rate= lr).minimize(total_loss_d, var_list=self.vars_d) self.optimizer_g = tf.train.RMSPropOptimizer(learning_rate= lr).minimize(total_loss_g, var_list=self.vars_g)
def __init__(self, user_num, item_num, f, user_pos_length, user_neg_length, item_pos_length, item_neg_length, user_pos_vocab_size, user_neg_vocab_size, item_pos_vocab_size, item_neg_vocab_size, embedding_size, filter_sizes, num_filters, n_pos_aspect, n_neg_aspect): self.input_u_pos = tf.placeholder(tf.int32, [None, user_pos_length], name='input_u_pos') self.input_u_neg = tf.placeholder(tf.int32, [None, user_neg_length], name='input_u_neg') self.input_i_pos = tf.placeholder(tf.int32, [None, item_pos_length], name='input_i_pos') self.input_i_neg = tf.placeholder(tf.int32, [None, item_neg_length], name='input_i_neg') self.input_y = tf.placeholder(tf.float32, [None, 1], name="input_y") self.input_uid = tf.placeholder(tf.int32, [None, 1], name="input_uid") self.input_iid = tf.placeholder(tf.int32, [None, 1], name="input_iid") self.dropout_keep_prob = tf.placeholder(tf.float32, name="dropout_keep_prob") with tf.name_scope("user_pos_embedding"): self.Wu_pos = tf.Variable(tf.random_uniform([user_pos_vocab_size, embedding_size], -1.0, 1.0), trainable=False, name='Wu_pos') self.embedded_user_pos = tf.nn.embedding_lookup(self.Wu_pos, self.input_u_pos) self.embedded_users_pos = tf.expand_dims(self.embedded_user_pos, -1) with tf.name_scope("user_neg_embedding"): self.Wu_neg = tf.Variable(tf.random_uniform([user_neg_vocab_size, embedding_size], -1.0, 1.0), trainable=False, name='Wu_neg') self.embedded_user_neg = tf.nn.embedding_lookup(self.Wu_neg, self.input_u_neg) self.embedded_users_neg = tf.expand_dims(self.embedded_user_neg, -1) with tf.name_scope("item_pos_embedding"): self.Wi_pos = tf.Variable(tf.random_uniform([item_pos_vocab_size, embedding_size], -1.0, 1.0), trainable=False, name='Wi_pos') self.embedded_item_pos = tf.nn.embedding_lookup(self.Wi_pos, self.input_i_pos) self.embedded_items_pos = tf.expand_dims(self.embedded_item_pos, -1) with tf.name_scope("item_neg_embedding"): self.Wi_neg = tf.Variable(tf.random_uniform([item_neg_vocab_size, embedding_size], -1.0, 1.0), trainable=False, name='Wi_neg') self.embedded_item_neg = tf.nn.embedding_lookup(self.Wi_neg, self.input_i_neg) self.embedded_items_neg = tf.expand_dims(self.embedded_item_neg, -1) with tf.name_scope("user_latent_factors"): self.user_Matrix = tf.Variable(tf.random_uniform([user_num, f], -1.0, 1.0), name='user_Matrix') self.user_latent = tf.nn.embedding_lookup(self.user_Matrix, self.input_uid) self.user_latent = tf.reshape(self.user_latent, [-1, f]) with tf.name_scope("item_latent_factors"): self.item_Matrix = tf.Variable(tf.random_uniform([item_num, f], -1.0, 1.0), name='item_Matrix') self.item_latent = tf.nn.embedding_lookup(self.item_Matrix, self.input_iid) self.item_latent = tf.reshape(self.item_latent, [-1, f]) with tf.name_scope("pos_aspect_weight"): self.pos_W = tf.Variable(tf.random_uniform([n_pos_aspect, f], -1.0, 1.0), name='pos_W') with tf.name_scope("neg_aspect_weight"): self.neg_W = tf.Variable(tf.random_uniform([n_neg_aspect, f], -1.0, 1.0), name='neg_W') output_u_pos = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("user_pos_conv-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( self.embedded_users_pos, W, strides=[1, 1, embedding_size, 1], padding="SAME", name="conv") # batch_size * user_pos_length * 1 * num_filters # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") h1 = tf.reshape(h, [-1, user_pos_length, num_filters]) output_u_pos.append(h1) num_filters_total = num_filters * len(filter_sizes) self.output_u_pos_con = tf.concat(output_u_pos, 2) self.output_u_pos_res = tf.reshape(self.output_u_pos_con, [-1, num_filters_total]) # Layer 1 Wu_pos_1 = tf.get_variable("Wu_pos_1", shape=[num_filters_total, n_pos_aspect], initializer=tf.contrib.layers.xavier_initializer()) bu_pos_1 = tf.Variable(tf.constant(0.1, shape = [n_pos_aspect])) self.u_pos_l1 = tf.nn.softmax(tf.nn.relu(tf.matmul(self.output_u_pos_res, Wu_pos_1) + bu_pos_1)) self.pos_asp = tf.reduce_sum(tf.reshape(self.u_pos_l1, [-1, user_pos_length, n_pos_aspect]), axis=1) self.pos_asp_imp = tf.nn.softmax(self.pos_asp) # batch_size * n_pos_aspect output_u_neg = [] for i, filter_size in enumerate(filter_sizes): with tf.name_scope("user_neg_conv-%s" % filter_size): # Convolution Layer filter_shape = [filter_size, embedding_size, 1, num_filters] # [filter_height, filter_width, in_channels, out_channels] W = tf.Variable(tf.truncated_normal(filter_shape, stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[num_filters]), name="b") conv = tf.nn.conv2d( self.embedded_users_neg, W, strides=[1, 1, embedding_size, 1], padding="SAME", name="conv") # Apply nonlinearity h = tf.nn.relu(tf.nn.bias_add(conv, b), name="relu") h1 = tf.reshape(h, [-1, user_neg_length, num_filters]) output_u_neg.append(h1) self.output_u_neg_con = tf.concat(output_u_neg, 2) self.output_u_neg_res = tf.reshape(self.output_u_neg_con, [-1, num_filters_total]) # Layer 1 Wu_neg_1 = tf.get_variable("Wu_neg_1", shape=[num_filters_total, n_neg_aspect], initializer=tf.contrib.layers.xavier_initializer()) bu_neg_1 = tf.Variable(tf.constant(0.1, shape=[n_neg_aspect])) self.u_neg_l1 = tf.nn.softmax(tf.nn.relu(tf.matmul(self.output_u_neg_res, Wu_neg_1) + bu_neg_1)) self.neg_asp = tf.reduce_sum(tf.reshape(self.u_neg_l1, [-1, user_neg_length, n_neg_aspect]), axis=1) self.neg_asp_imp = tf.nn.softmax(self.neg_asp) # batch_size * n_neg_aspect neg_asp_imp_add = [] with tf.name_scope("pos2neg_imp"): W = tf.Variable(tf.truncated_normal(shape=[f, f], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[f]), name='b') h = tf.Variable(tf.truncated_normal(shape=[f, 1], stddev=0.1), name="h") for i in range(n_neg_aspect): neg_Wi = self.neg_W[i] mul = tf.multiply(self.pos_W, neg_Wi) rel = tf.nn.relu(tf.matmul(mul, W) + b) attn = tf.nn.softmax(tf.matmul(rel, h), dim=0) # n_pos_aspect * 1 neg_asp_imp_i = tf.matmul(self.pos_asp_imp, attn) # batch_size * 1 neg_asp_imp_add.append(neg_asp_imp_i) pos_asp_imp_add = [] with tf.name_scope("neg2pos_imp"): W = tf.Variable(tf.truncated_normal(shape=[f, f], stddev=0.1), name="W") b = tf.Variable(tf.constant(0.1, shape=[f]), name='b') h = tf.Variable(tf.truncated_normal(shape=[f, 1], stddev=0.1), name="h") for i in range(n_pos_aspect): pos_Wi = self.pos_W[i] mul = tf.multiply(self.neg_W, pos_Wi) rel = tf.nn.relu(tf.matmul(mul, W) + b) attn = tf.nn.softmax(tf.matmul(rel, h), dim=0) pos_asp_imp_i = tf.matmul(self.neg_asp_imp, attn) pos_asp_imp_add.append(pos_asp_imp_i) with tf.name_scope("prediction"): # print(self.user_latent.shape()) self.interaction = tf.multiply(self.user_latent, self.item_latent) self.pos_asp_r = tf.matmul(self.interaction, tf.transpose(self.pos_W)) # batch_size * n_pos_asp self.pos_imp = self.pos_asp_imp + tf.concat(pos_asp_imp_add, -1) self.pos_r = tf.reduce_sum(tf.multiply(self.pos_asp_r, self.pos_imp), axis=-1) self.neg_asp_r = tf.matmul(self.interaction, tf.transpose(self.neg_W)) self.neg_imp = self.neg_asp_imp + tf.concat(neg_asp_imp_add, -1) self.neg_r = tf.reduce_sum(tf.multiply(self.neg_asp_r, self.neg_imp), axis=-1) self.predictions = self.pos_r - self.neg_r regularizer = layers.l2_regularizer(scale=1.0) Var_list_1 = [Wu_pos_1, bu_pos_1, Wu_neg_1, bu_neg_1] for i, filter_size in enumerate(filter_sizes): Var_list_1 += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="user_pos_conv-%s" % filter_size) Var_list_1 += tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="user_neg_conv-%s" % filter_size) reg_1 = layers.apply_regularization(regularizer, weights_list=Var_list_1) Var_list_2 = [] Var_list_3 = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="pos2neg_imp") \ + tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES, scope="neg2pos_imp") reg_3 = layers.apply_regularization(regularizer, weights_list=Var_list_3) self.variables = Var_list_1 + Var_list_2 + Var_list_3 reg_4 = layers.apply_regularization(regularizer, weights_list=[self.user_Matrix, self.item_Matrix, self.pos_W, self.neg_W]) with tf.name_scope("loss"): beta_1 = 1e-4 beta_2 = 0.001 losses = tf.reduce_mean(tf.square(tf.subtract(self.predictions, self.input_y))) self.loss = losses + beta_2 * (reg_1 + reg_3 + reg_4) with tf.name_scope("accuracy"): self.mae = tf.reduce_mean(tf.abs(tf.subtract(self.predictions, self.input_y))) self.accuracy = tf.sqrt(tf.reduce_mean(tf.square(tf.subtract(self.predictions, self.input_y))))
import tensorflow as tf from tensorflow.contrib import layers data_dir = '/home/pzp/PycharmProjects/pzp_vgg16_project/data/train/*.JPEG' list = tf.train.match_filenames_once(data_dir) sess = tf.Session() sess.run(tf.global_variables_initializer()) dir_list = sess.run(list) stop = 1 layers.apply_regularization()
def test(self, modelpath): with self.graph.as_default(): c3d_net = [ ["conv", "conv1", [3, 3, 3, 3, 64], 'wc1', 'bc1'], ["maxpool", "pool1", [1, 1, 2, 2, 1]], ["conv", "conv2", [3, 3, 3, 64, 128], 'wc2', 'bc2'], ["maxpool", "pool2", [1, 2, 2, 2, 1]], ["conv", "conv3a", [3, 3, 3, 128, 256], 'wc3a', 'bc3a'], ["conv", "conv3b", [3, 3, 3, 256, 256], 'wc3b', 'bc3b'], ["maxpool", "pool3", [1, 2, 2, 2, 1]], ["conv", "conv4a", [3, 3, 3, 256, 512], 'wc4a', 'bc4a'], ["conv", "conv4b", [3, 3, 3, 512, 512], 'wc4b', 'bc4b'], ["maxpool", "pool4", [1, 2, 2, 2, 1]], ["conv", "conv5a", [3, 3, 3, 512, 512], 'wc5a', 'bc5a'], ["conv", "conv5b", [3, 3, 3, 512, 512], 'wc5b', 'bc5b'], ["maxpool", "pool5", [1, 2, 2, 2, 1]], [ "transpose", [0, 1, 4, 2, 3] ], #only use it if you restore the sports1m_finetuning_ucf101.model, otherwise uncomment it,(e.g use conv3d_deepnetA_sport1m_iter_1900000_TF.model) ["reshape", [-1, 8192]], ["fc", "fc1", [8192, 4096], 'wd1', 'bd1', True], ["dropout", "dropout1", self.keep_prob], ["fc", "fc2", [4096, 4096], 'wd2', 'bd2', True], ["dropout", "dropout2", self.keep_prob], ["fc", "fc3", [4096, self.num_class], 'wout', 'bout', False], ] config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(config=config, graph=self.graph) as sess: logits = self.parseNet(self.inputs, c3d_net) softmax_logits = tf.nn.softmax(logits) int_label = self.labels task_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits( logits=logits, labels=int_label)) acc = tf.reduce_mean( tf.cast( tf.equal(tf.argmax(softmax_logits, axis=-1), int_label), tf.float32)) right_count = tf.reduce_sum( tf.cast( tf.equal(tf.argmax(softmax_logits, axis=1), int_label), tf.int32)) ensemble_logist = softmax_logits reg_loss = layers.apply_regularization( layers.l2_regularizer(5e-4), tf.get_collection(tf.GraphKeys.WEIGHTS)) total_loss = task_loss + reg_loss train_op = tf.train.GradientDescentOptimizer(self.lr).minimize( total_loss, global_step=self.global_step) total_para = np.sum([ np.prod(v.get_shape().as_list()) for v in tf.trainable_variables() ]) print('total_para:', total_para) init = tf.global_variables_initializer() sess.run(init) saver = tf.train.Saver(tf.trainable_variables()) # ======================================================================================== #Recode after lost all code - awful day 21/5/2018 # test_list=["./test1.list",'./test1.list',"./test1.list"] # test_list=["./kth_rgb_test.list",'./kth_fsaf_test2.list',"./kth_of_test2.list"] # network_models = ['c3d_kth_rgb','c3d_kth_fsaf','c3d_kth_of'] test_list = [ "./hmdb51_rgb_test.list", "./hmdb51_fsaf_test.list", './hmdb51_of_test2.list' ] network_models = [ 'c3d_hmdb51_rgb', 'c3d_hmdb51_fsaf', 'c3d_hmdb51_of' ] # test_list=["./ucf101_rgb_test.list","./ucf101_saf_test2.list",'./ucf101_of_test2.list'] # network_models = ['c3d_ucf_rgb','c3d_ucf_saf','c3d_ucf_of'] # lines = open(test_list[0],'r') # # lines = list(lines) # lines = list(line for line in lines if line) # number_of_line = len(lines) # self.test_size = number_of_line list_accuracy = [] pred_labels = [] true_labels = [] num_networks = len(network_models) # ====================================================================================== for m in range(num_networks): softmax_one_networks = [] saver.restore(sess, modelpath + network_models[m]) print("Model {:2d} loading finished!".format(m)) step = 0 print_freq = 2 next_start_pos = 0 lines = open(test_list[m], 'r') # lines = list(lines) lines = list(line for line in lines if line) number_of_line = len(lines) self.test_size = number_of_line # print(number_of_line) for one_epoch in range(1): epostarttime = time.time() starttime = time.time() total_v = 0.0 test_correct_num = 0 for i in tqdm( range(int(self.test_size / self.batch_size))): step += 1 total_v += self.batch_size train_batch, label_batch, next_start_pos, _, _ = read_clip_and_label( filename=test_list[m], batch_size=self.batch_size, start_pos=next_start_pos, num_frames_per_clip=self.CLIP_LENGTH, height=self.IMG_HEIGHT, width=self.IMG_WIDTH, shuffle=False) assert len(train_batch) == self.batch_size train_batch = train_aug( train_batch, is_train=False, Crop_heith=self.CROP_HEIGHT, Crop_width=self.CROP_WIDTH, norm=True) val_feed = { self.inputs: train_batch, self.labels: label_batch } test_correct_num += sess.run(right_count, val_feed) #add 22/5 softmax = sess.run(ensemble_logist, val_feed) if m == 0: #get for first network only true_labels.extend(label_batch) softmax_one_networks.extend(softmax) print('test acc:', test_correct_num / total_v, 'test_correct_num:', test_correct_num, 'total_v:', total_v) list_accuracy.append(test_correct_num / total_v) pred_labels.append(softmax_one_networks) print(list_accuracy) print(np.shape(true_labels), np.shape(pred_labels)) # pred_labels shape = (num_networks, num_label,num_class) # print(true_labels) #ensemble: number_of_test = len(true_labels) if self.ensemble_type == 1: #average fusion ensemble_pred_labels = np.mean(pred_labels, axis=0) ensemble_cls_pred = np.argmax(ensemble_pred_labels, axis=1) elif self.ensemble_type == 2: # max average ensemble_pred_labels = np.amax(pred_labels, axis=0) ensemble_cls_pred = np.argmax(ensemble_pred_labels, axis=1) else: #vote fusion #Compare networks vote_softmax = np.zeros(number_of_test, dtype=int) print(number_of_test, np.shape(pred_labels)) for i in range(number_of_test): argmax_networks = [] for m in range(num_networks): argmax_networks.append( np.argmax(pred_labels[m][i], axis=0)) # compare each network to choose counter = Counter(argmax_networks) best_net = [(k, v) for k, v in counter.items() if v == max(counter.values())] if len( best_net ) > 1: #there are many network with predict the same label vote_softmax[i] = np.argmax(np.amax(pred_labels, axis=0), axis=1)[i] # print(best_net,i,vote_softmax[i],true_labels[i]) else: vote_softmax[i] = best_net[0][0] ensemble_cls_pred = vote_softmax ensemble_correct = (ensemble_cls_pred == true_labels) print('ensemble accuracy:', np.sum(ensemble_correct / number_of_test))
def buildGraph(input_placeholder_s1, input_placeholder_s2, labels_placeholder, mask_placeholder_s1, mask_placeholder_s2, dropout_placeholder, embeddings_matrix): params = tf.Variable(embeddings_matrix) tensor_s1 = tf.nn.embedding_lookup(params, input_placeholder_s1) tensor_s2 = tf.nn.embedding_lookup(params, input_placeholder_s2) embeddings_s1 = tf.reshape(tensor_s1, [-1, max_length, embed_size]) embeddings_s2 = tf.reshape(tensor_s2, [-1, max_length, embed_size]) #print embeddings_s1.shape #print tf.boolean_mask(embeddings_s1, mask_placeholder_s1, axis=1).shape #embeddings = tf.concat([tf.reduce_mean(tf.boolean_mask(embeddings_s1, mask_placeholder_s1), axis=1), tf.reduce_mean(tf.boolean_mask(embeddings_s2, mask_placeholder_s2), axis=1)], 0) #print embeddings.shape dropout_rate = dropout_placeholder preds = [] cell1 = LSTMCell(embed_size, hidden_size) cell2 = LSTMCell2(embed_size, hidden_size) c = tf.zeros([tf.shape(embeddings_s1)[0], hidden_size]) h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size]) initial_state = tf.contrib.rnn.LSTMStateTuple(c, h) l1 = tf.reduce_sum(tf.cast(mask_placeholder_s1, tf.int32), axis=1) outputs1, state1 = tf.nn.dynamic_rnn(cell1, embeddings_s1, dtype=tf.float32, initial_state=initial_state, sequence_length=l1) h = tf.zeros([tf.shape(embeddings_s2)[0], hidden_size]) initial_state = tf.contrib.rnn.LSTMStateTuple(state1.c, h) l2 = tf.reduce_sum(tf.cast(mask_placeholder_s2, tf.int32), axis=1) outputs2, state2 = tf.nn.dynamic_rnn(cell2, embeddings_s2, dtype=tf.float32, initial_state=initial_state, sequence_length=l2) func = xavier_weight_init() # Implementation of attention on the final hidden layer Y = tf.transpose(outputs1, perm=[0, 2, 1]) W_y = tf.Variable(func([hidden_size, hidden_size])) W_h = tf.Variable(func([hidden_size, hidden_size])) e_l = tf.constant(1.0, shape=[1, max_length]) WY = tf.tensordot(W_y, Y, axes=[[0], [1]]) WY = tf.transpose(WY, perm=[1, 0, 2]) h_n = tf.reshape(state2.h, shape=[-1, hidden_size, 1]) Whe = tf.tensordot(h_n, e_l, axes=[[2], [0]]) Whe = tf.tensordot(W_h, Whe, axes=[[0], [1]]) Whe = tf.transpose(Whe, perm=[1, 0, 2]) M = tf.tanh(WY + Whe) w_alpha = tf.Variable(func([1, hidden_size])) alpha = tf.nn.softmax(tf.tensordot(w_alpha, M, axes=[[1], [1]])) alpha = tf.transpose(alpha, perm=[1, 2, 0]) alpha = tf.reshape(alpha, shape=[-1, max_length, 1]) #alpha_entries = tf.unstack(alpha, axis = 0, num=[tf.shape(embeddings_s1)[0]]) #Y_entries = tf.unstack(Y, axis=0, num=[tf.shape(embeddings_s1)[0]]) #r = tf.stack([tf.matmul(Y_entries[i], alpha_entries[i]) for i in len(alpha.shape[0])], axis=0) #print Y.shape, alpha.shape #r = tf.tensordot(Y, alpha, axes=[[2], [1]]) #r = tf.reduce_mean(r, axis=2) #r = r[:, :, 0, :] #r = tf.diag_part(r) r = tf.matmul(Y, alpha) r = tf.reshape(r, shape=[-1, hidden_size]) #r = Y * alpha #print r.shape #r = tf.matmul(Y, tf.transpose(alpha, perm=[0, 2, 1])) U = tf.Variable(func([hidden_size, n_classes])) b1 = tf.Variable(tf.zeros([1, n_classes])) W_p = tf.Variable(func([hidden_size, hidden_size])) W_x = tf.Variable(func([hidden_size, hidden_size])) #print r.shape, state2.h.shape hstar = tf.tanh(tf.matmul(r, W_p) + tf.matmul(state2.h, W_x)) #hstar = tf.tanh(tf.matmul(state2.h, W_x)) h_drop = tf.nn.dropout(hstar, keep_prob=1 - dropout_rate) pred = tf.matmul(h_drop, U) + b1 #pred = tf.add(tf.matmul(h_drop, U), b1, name="pred") loss = tf.nn.softmax_cross_entropy_with_logits(labels=labels_placeholder, logits=pred) loss = tf.reduce_mean(loss) regularizer = l1_l2_regularizer(l1_reg, l2_reg) reg_loss = apply_regularization(regularizer, tf.trainable_variables()) loss += reg_loss #y = labels_placeholder #loss = tf.nn.l2_loss(y-preds) #loss = tf.reduce_mean(loss) optimizer = tf.train.AdamOptimizer(learning_rate=lr) #train_op = optimizer.minimize(loss) #optimizer = tf.train.GradientDescentOptimizer(learning_rate=lr) gradients = optimizer.compute_gradients(loss) grads = [x[0] for x in gradients] grads, global_norm = tf.clip_by_global_norm(grads, max_grad_norm) gradients = [(grads[i], gradients[i][1]) for i in range(len(grads))] train_op = optimizer.apply_gradients(gradients) return pred, loss, train_op
def build_model(self): self._build_placeholder() data = None # Load word vocab and char vocab if we are using pretrained embedding if self.parameter['use_word_pretrained'] or self.parameter[ 'use_char_pretrained']: with open('necessary.pkl', 'rb') as f: data = pickle.load(f) self._build_word_and_char_embedding(data) # 각각의 임베딩 값을 가져온다 self._embeddings = [] self._embeddings.append( tf.nn.embedding_lookup(self._embedding_matrix[0], self.morph)) self._embeddings.append( tf.nn.embedding_lookup(self._embedding_matrix[1], self.character)) # 음절을 이용한 임베딩 값을 구한다. character_embedding = tf.reshape(self._embeddings[1], [ -1, self.parameter["word_length"], self.parameter["embedding"][1][2] ]) char_len = tf.reshape(self.character_len, [-1]) # Dropout after embedding, before lstm layer if self.parameter["use_dropout_after_embedding"]: character_embedding = tf.nn.dropout(character_embedding, self.emb_dropout_keep_prob) character_emb_rnn = self._build_birnn_model( character_embedding, char_len, self.parameter["char_lstm_units"], self.lstm_dropout_keep_prob, last=True, scope="char_layer") # 위에서 구한 모든 임베딩 값을 concat 한다. all_data_emb = self.ne_dict for i in range(0, len(self._embeddings) - 1): all_data_emb = tf.concat([all_data_emb, self._embeddings[i]], axis=2) all_data_emb = tf.concat([all_data_emb, character_emb_rnn], axis=2) if self.parameter["use_highway"]: all_data_emb = self._build_highway(all_data_emb, self.parameter["num_layers"], scope="highway") # Dropout after embedding, before lstm layer if self.parameter["use_dropout_after_embedding"]: all_data_emb = tf.nn.dropout(all_data_emb, self.emb_dropout_keep_prob) # 모든 데이터를 가져와서 Bi-RNN 실시 sentence_output = self._build_birnn_model(all_data_emb, self.sequence, self.parameter["lstm_units"], self.lstm_dropout_keep_prob, scope="all_data_layer") if self.parameter["use_self_attention"]: aligned_output = self._attention(sentence_output, self.parameter["lstm_units"], self.parameter["num_heads"], self.sequence, scope="attention") outputs = tf.concat([sentence_output, aligned_output], axis=2) else: outputs = sentence_output outputs = tf.nn.dropout(outputs, self.dropout_rate) # [b, t, 3*d] -> [b, t, C] logits = self._build_dense_layer(outputs) # crf layer crf_cost = self._build_crf_layer(logits) if self.parameter["use_reg_loss"]: reg_vars = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES) reg_term = layers.apply_regularization(self.regularizer, reg_vars) self.cost = crf_cost + reg_term else: self.cost = crf_cost self.train_op = self._build_output_layer(self.cost) # Exponential moving average if self.parameter["use_ema"]: var_ema = tf.train.ExponentialMovingAverage( decay=self.parameter["ema_decay_rate"]) ema_op = var_ema.apply(tf.trainable_variables()) with tf.control_dependencies([ema_op]): self.cost = tf.identity(self.cost)
def setup_model(self, input): ''' Args: input: a dictionary contains 'z', 'im_gt', sample_z ''' F = self.F ######################### # (1) Define main model # ######################### g_out = self.G(input['z']) d_out_real, d_logit_real = self.D(input['im_gt']) d_out_fake, d_logit_fake = self.D(g_out, reuse=True) self.output = g_out self.sample_img = self.G(input['sample_z'], reuse=True) ################### # (2) Define loss # ################### d_loss_real = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_out_real), logits=d_logit_real)) d_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.zeros_like(d_out_fake), logits=d_logit_fake)) g_loss_fake = tf.reduce_mean( tf.nn.sigmoid_cross_entropy_with_logits( labels=tf.ones_like(d_out_fake), logits=d_logit_fake)) d_reg_loss = tcl.apply_regularization(self.regularizer, weights_list=self.D.vars_train) g_reg_loss = tcl.apply_regularization(self.regularizer, weights_list=self.G.vars_train) self.d_loss = d_loss_real + d_loss_fake + d_reg_loss self.g_loss = g_loss_fake + g_reg_loss ######################## # (3) Define optimizer # ######################## global_step = tf.train.get_global_step() d_optimizer = tf.train.AdamOptimizer(learning_rate=F.learning_rate, beta1=F.beta1, beta2=F.beta2) g_optimizer = tf.train.AdamOptimizer(learning_rate=F.learning_rate, beta1=F.beta1, beta2=F.beta2) self.d_train_op = tcl.optimize_loss(loss=self.d_loss, optimizer=d_optimizer, learning_rate=F.learning_rate, variables=self.D.vars_train, global_step=global_step, name='d_optim') self.g_train_op = tcl.optimize_loss(loss=self.g_loss, optimizer=g_optimizer, learning_rate=F.learning_rate, variables=self.G.vars_train, global_step=global_step, name='g_optim') ###################### # (4) Define summary # ###################### # scalar summary tf.summary.scalar('d_loss_real', d_loss_real) tf.summary.scalar('d_loss_fake', d_loss_fake) tf.summary.scalar('g_loss_fake', g_loss_fake) # histogram summary tf.summary.histogram('z', input['z']) tf.summary.histogram('d_out_real', d_out_real) tf.summary.histogram('d_out_fake', d_out_fake) # image summary tf.summary.image('generated', g_out, max_outputs=3) tf.summary.image('real', input['im_gt'], max_outputs=3) # merge all summary operations to a single operation self.summary_all = tf.summary.merge_all() return self.output
def test(self, test_list, modelpath): with self.graph.as_default(): c3d_net = [ ["conv", "conv1", [3, 3, 3, 3, 64], 'wc1', 'bc1'], ["maxpool", "pool1", [1, 1, 2, 2, 1]], ["conv", "conv2", [3, 3, 3, 64, 128], 'wc2', 'bc2'], ["maxpool", "pool2", [1, 2, 2, 2, 1]], ["conv", "conv3a", [3, 3, 3, 128, 256], 'wc3a', 'bc3a'], ["conv", "conv3b", [3, 3, 3, 256, 256], 'wc3b', 'bc3b'], ["maxpool", "pool3", [1, 2, 2, 2, 1]], ["conv", "conv4a", [3, 3, 3, 256, 512], 'wc4a', 'bc4a'], ["conv", "conv4b", [3, 3, 3, 512, 512], 'wc4b', 'bc4b'], ["maxpool", "pool4", [1, 2, 2, 2, 1]], ["conv", "conv5a", [3, 3, 3, 512, 512], 'wc5a', 'bc5a'], ["conv", "conv5b", [3, 3, 3, 512, 512], 'wc5b', 'bc5b'], ["maxpool", "pool5", [1, 2, 2, 2, 1]], ["transpose", [0, 1, 4, 2, 3]], #only use it if you restore the sports1m_finetuning_ucf101.model, otherwise uncomment it,(e.g use conv3d_deepnetA_sport1m_iter_1900000_TF.model) ["reshape", [-1, 8192]], ["fc", "fc1", [8192, 4096], 'wd1', 'bd1', True], ["dropout", "dropout1", self.keep_prob], ["fc", "fc2", [4096, 4096],'wd2','bd2', True], ["dropout", "dropout2", self.keep_prob], ["fc", "fc3", [4096, self.num_class],'wout','bout',False], ] # print(tf.trainable_variables()) # print(var_list) # print(tf.get_collection(tf.GraphKeys.WEIGHTS)) # gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction = 0.5) config = tf.ConfigProto() config.gpu_options.allow_growth = True config.gpu_options.per_process_gpu_memory_fraction = 0.9 with tf.Session(config=config, graph=self.graph) as sess: logits = self.parseNet(self.inputs, c3d_net) softmax_logits = tf.nn.softmax(logits) # int_label = tf.one_hot(self.labels, self.num_class) int_label = self.labels # [bs,101]-->[bs*4 or 8 or 16,101] # int_label=tf.concat( # [int_label,int_label,int_label,int_label,],axis=0) # int_label=tf.cast(int_label,dtype=tf.int64) task_loss = tf.reduce_sum( tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=int_label)) # task_loss = tf.reduce_sum(tf.nn.softmax_cross_entropy_with_logits(logits = logits, labels = int_label)) # task_loss = -tf.reduce_sum(int_label*tf.log(logits)) acc = tf.reduce_mean(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=-1), int_label), tf.float32)) right_count = tf.reduce_sum(tf.cast(tf.equal(tf.argmax(softmax_logits, axis=1), int_label), tf.int32)) reg_loss = layers.apply_regularization(layers.l2_regularizer(5e-4), tf.get_collection(tf.GraphKeys.WEIGHTS)) total_loss = task_loss + reg_loss # train_var_list = [v for v in tf.trainable_variables() if v.name.find("conv") == -1] train_op = tf.train.GradientDescentOptimizer(self.lr).minimize( total_loss, global_step=self.global_step) # train_op = tf.train.MomentumOptimizer(self.lr,0.9).minimize( # total_loss, global_step = self.global_step,var_list=train_var_list) total_para = np.sum([np.prod(v.get_shape().as_list()) for v in tf.trainable_variables()]) print('total_para:', total_para) # all CDC9 :28613120 #pool5 27655936 # train clip:762960 # test clip:302640 init = tf.global_variables_initializer() # var_list = [v for v in tf.trainable_variables() if v.name.find("conv") != -1] # 初始化只加载卷积层参数 # print(var_list) # saver = tf.train.Saver(tf.global_variables()) sess.run(init) saver = tf.train.Saver(tf.trainable_variables()) # saver.restore(sess, tf.train.latest_checkpoint(modelpath)) saver.restore(sess, modelpath + "sports1m_finetuning_ucf101.model") print("Model Loading Done!") step = 0 print_freq = 2 next_start_pos = 0 for one_epoch in range(1): epostarttime = time.time() starttime = time.time() total_v = 0.0 test_correct_num = 0 for i in tqdm(range(int(3783 / self.batch_size))): step += 1 total_v += self.batch_size train_batch, label_batch, next_start_pos, _, _ = read_clip_and_label( filename=test_list, batch_size=self.batch_size, num_frames_per_clip=self.CLIP_LENGTH, height=self.IMG_HEIGHT, width=self.IMG_WIDTH, start_pos=next_start_pos, shuffle=False ) assert len(train_batch)==self.batch_size train_batch = train_aug(train_batch, is_train=False, Crop_heith=self.CROP_HEIGHT, Crop_width=self.CROP_WIDTH,norm=True) val_feed = {self.inputs: train_batch, self.labels: label_batch} test_correct_num += sess.run(right_count, val_feed) print('test acc:', test_correct_num / total_v, 'test_correct_num:', test_correct_num, 'total_v:', total_v)