def run(self, sess, train, dev, tag2label, epoch, saver): num_batches = (len(train) + self.batch_size - 1) // self.batch_size # 将所有的训练数据分成多个batch,每次把一个batch送进网络中学习 batches = gen_batch(train, self.batch_size, self.vocab, self.tag2label, shuffle=self.shuffle) for step, (seqs, labels) in enumerate(batches): sys.stdout.write( 'batch总数: {}, 当前batch: {}'.format(num_batches, step + 1) + '\r') step_num = epoch * num_batches + step + 1 feed_dict, _ = self.get_feed(seqs, labels, self.lr, self.dropout_keep_prob) _, loss_train, summary, step_num_ = sess.run( [self.train_op, self.loss, self.merged, self.global_step], feed_dict=feed_dict) self.file_writer.add_summary(summary, step_num) if step + 1 == num_batches: saver.save(sess, self.model_path, global_step=step_num) print('模型验证') # 在测试集上验证这一轮训练之后模型的效果 label_list_dev, seq_len_list_dev = self.dev(sess, dev) # 计算指标 self.evaluate(label_list_dev, seq_len_list_dev, dev, epoch)
def train(self, current_epoch): #process one epoch sum_loss = 0.0 current_epoch = np.array(current_epoch, np.float32) processed_batch = 0 processed_before_batch = 0 begin = time.time() log_begin = begin for instance_info in utils.gen_batch(self._options.train_data, self._word_freq, self._word_id, self._phrase_ids, self._options.batch_size, self._options.window, self._options.subsample): embed_id, context_id, processed_num = instance_info phrase_length = len(embed_id[0]) if self._options.not_embedding_train and phrase_length == 1: #if not embedding train is true, ignore one word continue loss, _, lr = self._session.run([self.loss[phrase_length], self.optimize_op[phrase_length], self.lr], {self.holder[phrase_length]: np.array(embed_id, dtype=np.int32), self.context_id: np.array(context_id, dtype=np.int32), self.processed_num: np.array(processed_num, dtype=np.float32), }) sum_loss += loss * self._options.batch_size processed_batch += 1 #output log if processed_batch % 10000 == 0: end = time.time() print 'Epoch: %d\tTrained: %s\tLr: %.6f\tLoss: %.4f\tword/sec: %d'%(current_epoch, processed_num, lr, sum_loss / (processed_num - processed_before_batch), (processed_num - processed_before_batch) / (end - log_begin)) log_begin = time.time() processed_before_batch = processed_num sum_loss = 0.0
def dev(self, sess, dev): label_list, seq_len_list = [], [] # 生成多个batch for seqs, labels in gen_batch(dev, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, seq_len_list_ = self.predict(sess, seqs) # 预测标注序列 label_list.extend(label_list_) seq_len_list.extend(seq_len_list_) return label_list, seq_len_list
def test(self, sess, sent): label_list = [] # 生成多个batch for seqs, labels in gen_batch(sent, self.batch_size, self.vocab, self.tag2label, shuffle=False): label_list_, _ = self.predict(sess, seqs) label_list.extend(label_list_) label2tag = {} # 将数字转换成tag for tag, label in self.tag2label.items(): label2tag[label] = tag if label != 0 else label tag = [label2tag[label] for label in label_list[0]] return tag
def main(): with open('../data/ex_train_list.pkl', 'rb') as f1: train_list = pickle.load(f1) with open('../data/ex_landmark.pkl', 'rb') as f2: landmark = pickle.load(f2) print(len(train_list)) print('Loading data...') x_train, label, shape = load_data(train_list, landmark) x_train = np.asarray(x_train) shape = np.asarray(shape) label = np.asarray(label) (im_train, lm_train, gt_train), (x_val, lm_val, gt_val) = split_data(x_train, shape, label, split_ratio=0.1) img_ph = tf.placeholder(tf.float32, [None, 224, 224, 3]) lm_ph = tf.placeholder(tf.float32, [None, 51 * 2]) label_ph = tf.placeholder(tf.float32, [None, 8]) keep_prob = tf.placeholder(tf.float32) lr_ph = tf.placeholder(tf.float32) with tf.Session() as sess: dan = vgg_face.Vgg_face() dan.build(img_ph, keep_prob) dgn = vgg_face.DGN() dgn.build(lm_ph, keep_prob) with tf.name_scope('dan'): dan_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=dan.fc8, labels=label_ph) dan_loss = tf.reduce_mean(dan_cross_entropy) with tf.name_scope('dgn'): dgn_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=dgn.fc3, labels=label_ph) dgn_loss = tf.reduce_mean(dgn_cross_entropy) with tf.name_scope('dagn'): dagn_cross_entropy = tf.nn.softmax_cross_entropy_with_logits( logits=dan.fc8 + dgn.fc3, labels=label_ph) dagn_loss = tf.reduce_mean(dagn_cross_entropy) with tf.name_scope('loss'): loss = dan_loss + dgn_loss + 0.1 * dagn_loss train_step = tf.train.AdamOptimizer(lr_ph).minimize(loss) with tf.name_scope('acc'): pred = tf.nn.softmax(dan.fc8 + dgn.fc3) correct_prediction = tf.equal(tf.argmax(pred, 1), tf.argmax(label_ph, 1)) accuracy = tf.reduce_sum(tf.cast(correct_prediction, tf.float32)) sess.run(tf.global_variables_initializer()) saver = tf.train.Saver() best_acc = 0.0 lr = 1e-4 best_loss = 1000 for i in range(epoch): if i % 50 == 0 and i != 0: lr = 1e-4 print('\nlearning rate has reset to', lr) lr = 0.98 * lr cnt = 0 for im, lm, gt in gen_batch(im_train, lm_train, gt_train, batch_size): tStart = time.time() sess.run(train_step, feed_dict={ img_ph: im, lm_ph: lm, label_ph: gt, keep_prob: 1.0, lr_ph: lr }) tEnd = time.time() print_process(cnt, im_train.shape[0] // batch_size, tEnd - tStart) if cnt == im_train.shape[0] // batch_size: break cnt += 1 train_acc = 0.0 train_loss = 0.0 for im, lm, gt in gen_batch(im_train, lm_train, gt_train, batch_size): acc, l = sess.run((accuracy, loss), feed_dict={ img_ph: im, lm_ph: lm, label_ph: gt, keep_prob: 1.0 }) train_acc += acc train_loss += l val_acc = 0.0 val_loss = 0.0 for im, lm, gt in gen_batch(x_val, lm_val, gt_val, batch_size): acc, l = sess.run((accuracy, loss), feed_dict={ img_ph: im, lm_ph: lm, label_ph: gt, keep_prob: 1.0 }) val_acc += acc val_loss += l if (best_acc == val_acc / x_val.shape[0] and best_loss > val_loss ) or best_acc < val_acc / x_val.shape[0]: print("Epoch: %d, training accuracy %.4f, loss: %.4f, val_acc: %.4f, val_loss: %.4f val improve from %.4f to %.4f, save model." \ %(i+1, train_acc/im_train.shape[0], train_loss, val_acc/x_val.shape[0], val_loss, best_acc, val_acc/x_val.shape[0])) best_acc = val_acc / x_val.shape[0] best_loss = val_loss saver.save(sess, '../model/dgan.ckpt') else: print("Epoch: %d, training accuracy %.4f, loss: %.4f, val_acc: %.4f, val_loss: %.4f val_acc doesn't improve." \ %(i+1, train_acc/im_train.shape[0], train_loss, val_acc/x_val.shape[0], val_loss))