def run_epoch(self, sess, src_train, src_dev, tags, target_train, target_dev, n_epoch_noimprove): nbatces = (len(target_train) + self.target_batch_size - 1) // self.target_batch_size prog = Progbar(target=nbatces) total_loss = 0 src = minibatches(src_train, self.src_batch_size, circle=True) target = minibatches(target_train, self.target_batch_size, circle=True) for i in range(nbatces): src_words, src_tags, _ = next(src) target_words, target_tags, _ = next(target) labels = src_tags + target_tags feed_dict, _ = self.get_feed_dict(src_words, labels, target_words, self.args.learning_rate, self.args.dropout, self.src_batch_size, is_training=True) if self.args.penalty_ratio > 0: _, src_crf_loss, target_crf_loss, penalty_loss, loss = sess.run( [self.train_op, self.src_crf_loss, self.target_crf_loss, self.penalty_loss, self.loss], feed_dict=feed_dict) try: prog.update(i + 1, [("train loss", loss[0]), ("src crf", src_crf_loss), ("target crf", target_crf_loss), ("{} loss".format(self.args.penalty), penalty_loss)]) except: prog.update(i + 1, [("train loss", loss), ("src crf", src_crf_loss), ("target crf", target_crf_loss), ("{} loss".format(self.args.penalty), penalty_loss)]) else: _, src_crf_loss, target_crf_loss, loss = sess.run( [self.train_op, self.src_crf_loss, self.target_crf_loss, self.loss], feed_dict=feed_dict) try: prog.update(i + 1, [("train loss", loss[0]), ("src crf", src_crf_loss), ("target crf", target_crf_loss)]) except: prog.update(i + 1, [("train loss", loss), ("src crf", src_crf_loss), ("target crf", target_crf_loss)]) total_loss += loss self.info['loss'] += [total_loss / nbatces] acc, p, r, f1 = self.run_evaluate(sess, target_train, tags, target='target') self.info['dev'].append((acc, p, r, f1)) self.logger.critical( "target train acc {:04.2f} f1 {:04.2f} p {:04.2f} r {:04.2f}".format(100 * acc, 100 * f1, 100 * p, 100 * r)) acc, p, r, f1 = self.run_evaluate(sess, target_dev, tags, target='target') self.info['dev'].append((acc, p, r, f1)) self.logger.info( "dev acc {:04.2f} f1 {:04.2f} p {:04.2f} r {:04.2f}".format(100 * acc, 100 * f1, 100 * p, 100 * r)) return acc, p, r, f1
def evaluate(self, test): accuracy = [] correct_prediction = 0. total_correct = 0. total_prediction = 0. for word, label in minibatches(test, self.config.batch_size): label_predict, seq_len = self.predict_batch(word) for lb, lb_pred, length in zip(label, label_predict, seq_len): lb = lb[:length] lb_pred = lb_pred[:length] accuracy += [a == b for (a, b) in zip(lb, lb_pred)] lb_chunks = set(get_chunks(lb, self.config.vocab_tag)) lb_pred_chunks = set(get_chunks(lb_pred, self.config.vocab_tag)) correct_prediction += len(lb_chunks & lb_pred_chunks) total_prediction += len(lb_pred_chunks) total_correct += len(lb_chunks) precision = correct_prediction / total_prediction if correct_prediction > 0 else 0 recall = correct_prediction / total_correct if correct_prediction > 0 else 0 f1 = 2 * precision * recall / (precision + recall) if correct_prediction > 0 else 0 acc = np.mean(accuracy) return {"accuracy": 100 * acc, "f1-score": 100 * f1}
def run_epoch(self, sess, train, dev, epoch): nbatches = (len(train) + self.config.batch_size - 1) / self.config.batch_size total_loss = 0.0 batch_cnt = 0 for i, (words, labels) in enumerate(minibatches(train, self.config.batch_size)): fd, _ = self.get_feed_dict(words, labels=labels, lr=self.config.learning_rate, dropout=self.config.dropout) _, loss, summary = sess.run( [self.train_op, self.loss, self.merged], feed_dict=fd) total_loss += loss if i % 100000 == 0: acc = self.run_eval(sess, dev) self.file_writer.add_summary( tf.Summary(value=[ tf.Summary.Value(tag='eval_acc', simple_value=acc) ]), epoch) batch_cnt += 1 if i % 20 == 0: self.file_writer.add_summary(summary, epoch * nbatches + i) if i % 1000 == 0: print("batch {}, loss {:04.2f}.".format( i, float(total_loss) / batch_cnt)) acc = self.run_eval(sess, dev) #self.file_writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag='eval_acc',simple_value=acc)]),epoch) print("- dev acc {:04.2f} - f1 {:04.2f}".format(100 * acc, 100 * 0)) return acc
def run_epoch(self, sess, train, dev, tags, epoch): """ Performs one complete pass over the train set and evaluate on dev Args: sess: tensorflow session train: dataset that yields tuple of sentences, tags dev: dataset tags: {tag: index} dictionary epoch: (int) number of the epoch """ nbatches = (len(train) + self.config.batch_size - 1) // self.config.batch_size prog = Progbar(target=nbatches) total_loss = 0.0 count = 0 for i, (words, labels, iob, mention_type, mentions, _) in enumerate(minibatches(train, self.config.batch_size)): mention_sizes = [] for m in mentions: mention_sizes.append(len(m)) fd, _ = self.get_feed_dict(words, self.config.lr, self.config.dropout, iob, mention_type, mentions, mention_sizes) logits, _, a, b, train_loss= sess.run([self.boundry_logits, self.train_op, self.loss_a, self.loss_b, self.loss], feed_dict=fd) total_loss += train_loss count += 1 print total_loss/count acc, f1 = self.run_evaluate(sess, dev, tags) self.logger.info("- dev acc {:04.2f} - f1 {:04.2f}".format(100*acc, 100*f1)) return acc, f1
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, f1
def run_evaluate(self, sess, test): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags Returns: accuracy phone error rate """ accs = [] group_accuracy = [] for framelist, phones in minibatches(test, self.config.batch_size): phones_pred, sequence_lengths = self.predict_batch(sess, framelist) for lab, lab_pred, length in zip(phones, phones_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += map(lambda x: x[0] == x[1], zip(lab, lab_pred)) group = [self.phn2group[self.idx2phn[x]] for x in lab] group_pred = [ self.phn2group[self.idx2phn[x]] for x in lab_pred ] group_accuracy += map(lambda x: x[0] == x[1], zip(group, group_pred)) acc = np.mean(accs) per = 1 - np.mean(group_accuracy) return acc, per
def run_epoch(self, sess, train, dev, epoch): """ Performs one complete pass over the train set and evaluate on dev Args: sess: tensorflow session train: dataset that yields tuple of sentences, tags dev: dataset epoch: (int) number of the epoch """ nbatches = (len(train) + self.config.batch_size - 1) / self.config.batch_size prog = Progbar(target=nbatches, verbose=False) for i, (framelist, phones) in enumerate(minibatches(train, self.config.batch_size)): fd, _ = self.get_feed_dict(framelist, phones, self.config.lr, self.config.keep_prob) _, train_loss, summary = sess.run( [self.train_op, self.loss, self.merged], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) # tensorboard if i % 10 == 0: self.file_writer.add_summary(summary, epoch * nbatches + i) acc, per = self.run_evaluate(sess, dev) self.logger.info(" - dev accuracy {:04.2f} - PER {:04.2f}".format( 100 * acc, 100 * per)) return acc, per
def run_evaluate(self, sess, test, vocab_tags, vocab_words): accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. for sentences, labels in minibatches(test, vocab_tags, vocab_words, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, sentences) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred= lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, vocab_tags)) lab_pred_chunks = set(get_chunks(lab_pred, vocab_tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p *r /(p+r) if correct_preds > 0 else 0 acc = np.mean(accs) return {"acc":100*acc, "f1":100*f1,"precision":100*p,"recall":100*r}
def run_epoch(self, sess, train, dev, tags, epoch): """ Performs one complete pass over the train set and evaluate on dev Args: sess: tensorflow session train: dataset that yields tuple of sentences, tags dev: dataset tags: {tag: index} dictionary epoch: (int) number of the epoch """ nbatches = (len(train) + self.config.batch_size - 1) // self.config.batch_size prog = Progbar(target=nbatches) for i, (words, labels) in enumerate(minibatches(train, self.config.batch_size)): fd, _ = self.get_feed_dict(words, labels, self.config.lr, self.config.dropout) _, train_loss, summary = sess.run( [self.train_op, self.loss, self.merged], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) # tensorboard if i % 10 == 0: self.file_writer.add_summary(summary, epoch * nbatches + i) acc, f1 = self.run_evaluate(sess, dev, tags) self.logger.info("- dev acc {:04.2f} - f1 {:04.2f}".format( 100 * acc, 100 * f1)) return acc, f1
def run_evaluate(self, sess, test, tags, target='src'): accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. nbatces = (len(test) + self.args.batch_size - 1) // self.args.batch_size prog = Progbar(target=nbatces) for i, (words, labels, target_words) in enumerate( minibatches(test, self.args.batch_size)): if target == 'src': labels_pred, sequence_lengths = self.predict_batch( sess, words, mode=target, is_training=False) else: labels_pred, sequence_lengths = self.predict_batch( sess, None, words, mode=target, is_training=False) for lab, label_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = label_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) prog.update(i + 1) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, p, r, f1
def run_predict(self, sess, test, test_deps, vocab_words, vocab_tags, print_test_results=False): """ Evaluates performance on test set """ idx_to_words = {} if print_test_results: idx_to_words = {idx: word for word, idx in vocab_words.iteritems()} test_accs = [] self.config.istrain = False # set to test first, #batch normalization# for words, poss, chunks, labels, \ btup_idx_list, btup_words_list, btup_depwords_list, btup_deprels_list, btup_depwords_length_list, \ upbt_idx_list, upbt_words_list, upbt_depwords_list, upbt_deprels_list, upbt_depwords_length_list, \ btup_formidx_list, upbt_formidx_list in minibatches(test, test_deps, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch( sess, words, poss, chunks, btup_idx_list, btup_words_list, btup_depwords_list, btup_deprels_list, btup_depwords_length_list, upbt_idx_list, upbt_words_list, upbt_depwords_list, upbt_deprels_list, upbt_depwords_length_list, btup_formidx_list, upbt_formidx_list) return labels_pred, sequence_lengths
def run_epoch(self, sess, train, dev, tags, epoch): """ Performs one complete pass over the train set and evaluate on dev Args: sess: tensorflow session train: dataset that yields tuple of sentences, tags dev: dataset tags: {tag: index} dictionary epoch: (int) number of the epoch """ nbatches = (len(train) + self.config.batch_size - 1) // self.config.batch_size prog = Progbar(target=nbatches) for i, (words, labels) in enumerate(minibatches(train, self.config.batch_size)): fd, _ = self.get_feed_dict(words, labels, self.config.lr, self.config.dropout) _, train_loss, summary = sess.run([self.train_op, self.loss, self.merged], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) # tensorboard if i % 10 == 0: self.file_writer.add_summary(summary, epoch*nbatches + i) acc, f1 = self.run_evaluate(sess, dev, tags) self.logger.info("- dev acc {:04.2f} - f1 {:04.2f}".format(100*acc, 100*f1)) return acc, f1
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a==b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, f1
def run_epoch(self, sess, train, train_deps, dev, dev_deps, vocab_words, vocab_tags, epoch): """ Performs one complete pass over the train set and evaluate on dev """ self.config.istrain = True # set to train first, #batch normalization# nbatches = (len(train_deps) + self.config.batch_size - 1) / self.config.batch_size prog = Progbar(target=nbatches) for i, (words, poss, chunks, labels, btup_idx_list, btup_words_list, btup_depwords_list, btup_deprels_list, btup_depwords_length_list, upbt_idx_list, upbt_words_list, upbt_depwords_list, upbt_deprels_list, upbt_depwords_length_list, btup_formidx_list, upbt_formidx_list) in enumerate( minibatches(train, train_deps, self.config.batch_size)): fd, sequence_lengths = self.get_feed_dict( words, poss, chunks, labels, btup_idx_list, btup_words_list, btup_depwords_list, btup_deprels_list, btup_depwords_length_list, upbt_idx_list, upbt_words_list, upbt_depwords_list, upbt_deprels_list, upbt_depwords_length_list, btup_formidx_list, upbt_formidx_list, self.config.lr, self.config.dropout) _, train_loss, logits = sess.run( [self.train_op, self.loss, self.logits], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) acc, recall, f1, test_acc = self.run_evaluate(sess, dev, dev_deps, vocab_words, vocab_tags) self.logger.info( "- dev acc {:04.2f} - dev recall {:04.2f} - f1 {:04.2f} - test acc {:04.2f}" .format(100 * acc, 100 * recall, 100 * f1, 100 * test_acc)) return acc, recall, f1, train_loss
def run_evaluate(self, test, log_step=None, mode='train'): """Evaluates performance on test set Args: test: dataset that yields tuple of (sentences, tags) get_loss: True, if you want to calculate validation loss Returns: metrics: (dict) metrics["acc"] = 98.4, ... """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. get_loss = self.config.early_stopping_metric == 'loss' if get_loss: loss = 0.0 weight = 0.0 for words, labels, pred_flags in minibatches(test, self.config.batch_size): if get_loss: labels_pred, sequence_lengths, batch_loss = self.predict_batch(words, labels=labels, pred_flags=pred_flags, get_loss=get_loss) _weight = len(sequence_lengths)/float(self.config.batch_size) weight += _weight loss += _weight * batch_loss else: labels_pred, sequence_lengths = self.predict_batch(words, get_loss, pred_flags=pred_flags) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a==b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, self.config.vocab_tags)) lab_pred_chunks = set(get_chunks(lab_pred, self.config.vocab_tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) if get_loss and mode =='train': loss = loss/weight dev_summary = self.sess.run(self.dev_merged, feed_dict={self.eval_loss: loss, self.eval_f1: f1}) self.dev_file_writer.add_summary(dev_summary, log_step) return {"acc": 100*acc, "f1": 100*f1, "loss": loss} if mode == 'evaluate': dataset_name = basename(normpath(test.filename)) self.save_evaluation_results(dataset_name,f1) return {"acc": 100*acc, "f1": 100*f1}
def run_eval(self, sess, test): accs = [] correct_preds, total_correct, total_preds = 0, 0, 0 for i, (words, labels) in enumerate(minibatches(test, self.config.batch_size)): pred, _acc = self.predict_batch(sess, words, labels) accs.append(_acc) acc = np.mean(np.array(accs)) return acc
def run_epoch(self, sess, train, dev, tags, epoch): """ Performs one complete pass over the train set and evaluate on dev Args: sess: tensorflow session train: dataset that yields tuple of sentences, tags dev: dataset tags: {tag: index} dictionary epoch: (int) number of the epoch """ #trie setting self.lis1 = [] self.lis2 = [] self.lis3 = [] self.lis4 = [] self.lis5 = [] trie.gazette(self.lis1, "data/dic/gazette.txt") trie.gazette(self.lis2, "data/dic/thres3.txt") trie.gazette_DTTI(self.lis3, "data/dic/DT_analysis.txt") trie.gazette_DTTI(self.lis4, "data/dic/TI_analysis.txt") trie.gazette(self.lis5, "data/dic/wiki_PS.txt") nbatches = (len(train) + self.config.batch_size - 1) // self.config.batch_size prog = Progbar(target=nbatches) for i, (words, fw_words, bw_words, labels, postags, sentences, _) in enumerate(minibatches(train, self.config.batch_size)): dict_labels = self.dict_trie(sentences) fd, _ = self.get_feed_dict(words, fw_words, bw_words, dict_labels, labels, self.config.lr, self.config.dropout, test_flag=0) _, train_loss, summary = sess.run( [self.train_op, self.loss, self.merged], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) # tensorboard if i % 10 == 0: self.file_writer.add_summary(summary, epoch * nbatches + i) acc, f1, p, r = self.run_evaluate(sess, dev, tags, test_flag=0) self.logger.info( "- dev acc {:04.2f} - f1 {:04.2f} - p {:04.2f} - r {:04.2f}". format(100 * acc, 100 * f1, 100 * p, 100 * r)) return acc, f1
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels, iob_gold, mention_type_gold, mentions_gold, _ in minibatches(test, self.config.batch_size): iob_labels_pred, sequence_lengths= self.predict_iob_batch(sess, words) mentions = [] mention_sizes = [] count = 0 for i in range(self.config.batch_size): length = sequence_lengths[i] mention = find_mentions(iob_labels_pred[i][:length]) mentions.append(mention) mention_sizes.append(len(mention)) if len(mention) == 0: count += 1 if count != self.config.batch_size: mentions_pred = self.predict_type_batch(sess, words, mentions, mention_sizes) else: mentions_pred = [[]]*self.config.batch_size for lab, iob_pred, length, mention, mention_pred, mention_size in zip(labels, iob_labels_pred, sequence_lengths, mentions, mentions_pred, mention_sizes): lab = lab[:length] iob_pred = iob_pred[:length] mention_pred = mention_pred[:mention_size] ''' lab_pred = find_labels(iob_pred, mention_pred, tags) #print iob_pred #print lab accs += [a==b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) ''' ''' p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, f1 ''' return 0.0, 0.0
def evaluate(self, valid): acc_total, loss_total, cnt = 0, 0, 0 for i, (data, y) in enumerate(data_utils.minibatches(valid, self.batch_size)): cnt += 1 acc = self.sess.run(self.accuracy, feed_dict={ self.input_x: data, self.input_y: y, self.dropout: 1.0 }) acc_total += self.batch_size * acc acc_valid = round(acc_total * 1.0 / len(valid), 3) return acc_valid
def evaluate(self, sess, test, tags): accuracy = [] f1 = [] for words, labels in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accuracy += [a == b for (a, b) in zip(lab, lab_pred)] f1.append(f1_score(lab, lab_pred, average='macro')) acc = np.mean(accuracy) f1 = sum(f1) / float(len(f1)) return acc, f1
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] global Globepoch Globepoch += 1 if Globepoch >= 8: OutFile = open("Res1/AWS_GPU_BEST_" + str(Globepoch), 'w') correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels in minibatches( test, self.config.batch_size ): ## here raw words and tags from main.py is starting to get converted into word to id's and tag to id's labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) test2lab = label2ind_ret() # print (test2lab) if Globepoch >= 8: for lab1 in lab_pred: OutFile.write(test2lab[lab1] + "\n") OutFile.write("\n") correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) return acc, f1
def run_evaluate(self, sess, test, save=False): accs, rms = [], [] labs, labs_ = [], [] for words, imp_labels in minibatches(test, self.config.batch_size): imp_labels_, sequence_lengths = self.predict_batch(sess, words) for lab, lab_, length in zip(imp_labels, imp_labels_, sequence_lengths): lab = lab[:length] lab_ = lab_[:length] if self.config.model == "lstm_sig": d_lab = map(self.config.ann2class, lab) d_lab_ = map(self.config.ann2class, lab_) else: d_lab = list(map(self.config.ann2class, lab)) d_lab_ = lab_[:] lab_ = list(map(self.config.class2ann, d_lab_)) rms += [ pow((float(a) - float(b)), 2) for (a, b) in zip(lab, lab_) ] accs += [a == b for (a, b) in zip(d_lab, d_lab_)] labs.extend(d_lab) labs_.extend(d_lab_) if save: with open(self.config.compare_predictions, 'w') as f: csv_writer = csv.writer(f) csv_writer.writerow(['truth', 'predictions']) for y, pred_y in zip(labs, labs_): csv_writer.writerow([y, pred_y]) print("'compare.csv' file saved!") p, r, f, s = score(labs, labs_, average="macro") cnf_mat = confusion_matrix(labs, labs_) acc = np.mean(accs) rms_ = np.sqrt(np.mean(rms)) return { 'accuracy': acc, 'precision': p, 'recall': r, 'f-score': f, 'cnf': cnf_mat, 'rms': -1 * rms_ }
def train(self, train, valid): accuracy = 0.0 for e in range(self.epoch): for i, (data, y) in enumerate(data_utils.minibatches(train, self.batch_size)): logits, _, lr, loss = self.sess.run([self.logits,self.train_step, self.learning_rate, self.loss], feed_dict= { self.input_x: data, self.input_y: y, self.dropout: self.dropout_prob }) if i % 100 == 0: acc_test = self.evaluate(valid) if acc_test > accuracy: self.save_session(self.model_path) print('This is the ' + str(e) + ' epoch training, the ' + str(i) + ' batch data,learning rate = ' + str(round(lr, 5)) + ', loss = ' + str(round(loss, 2)) + ', accuracy = ' + str(acc_test))
def train(self, train, dev): best_score = 0 nepoch_no_imprv = 0 # for early stopping self.add_summary() # tensorboard for epoch in range(self.config.nepochs): self.logger.info("Epoch {:} out of {:}".format(epoch + 1, self.config.nepochs)) batch_size = self.config.batch_size nbatches = (len(train) + batch_size - 1) // batch_size prog = Progbar(target=nbatches) #self.config.lr *= self.config.lr_decay for i, (words, labels, intent, all_tags) in enumerate(minibatches(train, batch_size)): fd, _ = self.get_feed_dict(words, all_tags, labels, intent, self.config.lr,\ self.config.dropout) _, train_loss, summary, intent_loss, slot_loss= self.sess.run( [self.train_op, self.loss, self.merged, self.intent_loss, self.slot_loss], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss), \ ("intent_loss", intent_loss), ("slot_loss", slot_loss)]) if i % 10 == 0: self.file_writer.add_summary(summary, epoch*nbatches + i) metrics = self.run_evaluate(dev) msg = " - ".join(["{} {:04.2f}".format(k, v) for k, v in metrics.items()]) self.logger.info(msg) score = metrics["f1"] + metrics["intent_acc"] self.config.lr *= self.config.lr_decay if score >= best_score: nepoch_no_imprv = 0 self.save_session() best_score = score self.logger.info("- new best score!") else: nepoch_no_imprv += 1 if nepoch_no_imprv >= self.config.nepoch_no_imprv: if not self.embedding_trainable: self.logger.info("fine tuning word embedding") for i in range(10): self.logger.info("######################") self.set_word_embeddings_trainable() self.config.lr = 0.001 nepoch_no_imprv = 0 else: self.logger.info("- early stopping {} epochs without "\ "improvement".format(nepoch_no_imprv)) break
def run_epoch(self, sess, train, dev, epoch): nbatches = (len(train) + self.config.batch_size - 1) / self.config.batch_size total_loss = 0.0 batch_cnt = 0 # acc, f1 = self.run_eval(sess, dev) # print('In valid data: ') # print('Accuracy: ',acc,'\n','Mean Accuracy: ', np.mean(acc)) # print('F1 Score: ',f1,'\n','Macro F1 Score: ', np.mean(f1)) for i, (words, labels) in enumerate(minibatches(train, self.config.batch_size)): fd = self.get_feed_dict(i, words, labels=labels, lr=self.config.learning_rate, dropout=self.config.dropout) _, loss, mean_acc, summary = sess.run( [self.train_op, self.loss, self.mean_acc, self.merged], feed_dict=fd) total_loss += loss if i % 300 == 0 and i != 0: print('In valid data: ') acc, f1 = self.run_eval(sess, dev) print('Accuracy: ', acc, '\n', 'Mean Accuracy: ', np.mean(acc)) print('F1 Score: ', f1, '\n', 'Macro F1 Score: ', np.mean(f1)) #self.file_writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag=['eval_acc_label_'+str(i) for i in range(self.config.nlabels)],simple_value=acc)]),epoch) batch_cnt += 1 if i % 20 == 0: self.file_writer.add_summary(summary, epoch * nbatches + i) if i % 100 == 0: print('epoch: %d, batch: %d, mean_acc: %.2f' % (epoch, i, mean_acc)) print("batch {}, loss {:04.2f}.".format( i, float(total_loss) / batch_cnt)) acc, f1 = self.run_eval(sess, dev) print('In valid data: ') print('Accuracy: ', acc, '\n', 'Mean Accuracy: ', np.mean(acc)) print('F1 Score: ', f1, '\n', 'Macro F1 Score: ', np.mean(f1)) #self.file_writer.add_summary(tf.Summary(value=[tf.Summary.Value(tag='eval_acc',simple_value=acc)]),epoch) # print("- dev acc {:04.2f} - f1 {:04.2f}".format(100 * acc, 100 * 0)) return acc
def run_evaluate(self, test, print_or_not = False): accs = [] intent_correct = 0 intent_total = 0 correct_preds, total_correct, total_preds = 0., 0., 0. for words, labels, intents, all_tags in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths, pred_intents, score = self.predict_batch(words, all_tags) for word_ins, lab, lab_pred, length, intent, pred_intent in\ zip(words, labels, labels_pred, sequence_lengths, intents, pred_intents): if print_or_not: #words_list = [str(a) for a in words_ins] #lab_list = [str(a) for a in lab] #lab_pred_list = [str(a) for a in lab_pred ] words_list = [self.config.idx2vocab[a] for a in word_ins] lab_list = [self.config.idx2tag[a] for a in lab] lab_pred_list = [self.config.idx2tag[a] for a in lab_pred ] print "||".join(words_list) + "\t" + "||".join(lab_list) \ + "\t" + "||".join(lab_pred_list) + "\t" \ + str(self.config.idx2intent[intent]) + "\t"\ + str(self.config.idx2intent[pred_intent]) lab = lab[:length] lab_pred = lab_pred[:length] accs += [a==b for (a,b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, self.config.vocab_tags)) lab_pred_chunks = set(get_chunks(lab_pred,self.config.vocab_tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) intent_total += 1 if pred_intent == intent: intent_correct += 1 p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) if intent_total != 0: intent_acc = intent_correct / float(intent_total) else: intent_acc = 0 return {"acc": 100*acc, "f1": 100*f1, "intent_acc": 100* intent_acc, \ "intent_correct": intent_correct, "intent_total": intent_total}
def run_epoch(self, train, dev, epoch): batch_size = self.config.batch_size num_batch = (len(train) + batch_size - 1) // batch_size prog = Progress(target=num_batch) for i, (word, label) in enumerate(minibatches(train, batch_size)): fd, _ = self.feed_dict(word, label, self.config.lr_rate, self.config.drop_out) _, train_loss, summary = self.session.run( [self.train_op, self.loss, self.merged], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) if (i % 10 == 0): self.file_writer.add_summary(summary, epoch * num_batch + i) metric = self.evaluate(dev) msg = " - ".join( ["{} {:04.2f}".format(k, v) for k, v in metrics.items()]) self.log.info(msg) return metrics["f1"]
def run_epoch(self, sess, train, dev, epoch): nbatches = (len(train) + self.config.batch_size - 1) // self.config.batch_size prog = Progbar(target=nbatches) for i, (words, imp_labels) in enumerate( minibatches(train, self.config.batch_size)): if self.config.model == "lstm_crf": imp_labels = list(map(self.config.digitize_labels, imp_labels)) fd, _ = self.get_feed_dict(words, imp_labels, self.config.lr, self.config.dropout) _, train_loss = sess.run([self.optimize_, self.loss], feed_dict=fd) prog.update(i + 1, [("train loss", train_loss)]) result = self.run_evaluate(sess, dev) self.logger.info( "- dev acc {:04.4f} - f {:04.4f} - rms {:04.4f}".format( 100 * result['accuracy'], 100 * result['f-score'], -1 * result['rms'])) return result
def run_eval(self, sess, test): accs = [] # TP, FP, FN = [np.zeros((self.config.nlabels,self.config.dim_per_label)) for i in range(3)] label_list = [[] for i in range(self.config.nlabels)] pred_list = [[] for i in range(self.config.nlabels)] correct_preds, total_correct, total_preds = 0, 0, 0 for i, (words, labels) in enumerate(minibatches(test, self.config.batch_size)): pred, _acc = self.predict_batch(sess, words, labels) accs.append(_acc) # TP, FP, FN = self.f1_update(pred, labels, TP, FP, FN) pred_list, label_list = self.update_f1(pred, labels, pred_list, label_list) acc = np.mean(np.array(accs), axis=0) # f1 = self.compute_f1(TP, FP, FN) # print(label_list, pred_list) f1 = [ f1_score(label_list[i], pred_list[i], average='macro') for i in range(self.config.nlabels) ] return acc, f1
def run_evaluate(self, sess, test, tags): """ Evaluates performance on test set Args: sess: tensorflow session test: dataset that yields tuple of sentences, tags tags: {tag: index} dictionary Returns: accuracy f1 score """ accs = [] correct_preds, total_correct, total_preds = 0., 0., 0. output_file = codecs.open("output", 'w', 'UTF-8') idx_to_tag = {idx: tag for tag, idx in tags.items()} for words, labels in minibatches(test, self.config.batch_size): labels_pred, sequence_lengths = self.predict_batch(sess, words) for lab, lab_pred, length in zip(labels, labels_pred, sequence_lengths): lab = lab[:length] lab_pred = lab_pred[:length] accs += [a == b for (a, b) in zip(lab, lab_pred)] lab_chunks = set(get_chunks(lab, tags)) lab_pred_chunks = set(get_chunks(lab_pred, tags)) correct_preds += len(lab_chunks & lab_pred_chunks) total_preds += len(lab_pred_chunks) total_correct += len(lab_chunks) output_string = "" for b, c in zip(lab, lab_pred): split_line = [idx_to_tag[b], idx_to_tag[c]] output_string += ' '.join(split_line) + '\n' output_file.write(output_string + '\n') p = correct_preds / total_preds if correct_preds > 0 else 0 r = correct_preds / total_correct if correct_preds > 0 else 0 f1 = 2 * p * r / (p + r) if correct_preds > 0 else 0 acc = np.mean(accs) output_file.close() return acc, f1
def run_epoch(self, train, dev, epoch): """Performs one complete pass over the train set and evaluate on dev Args: train: dataset that yields tuple of sentences, tags dev: dataset epoch: (int) index of the current epoch Returns: f1: (python float), score to select model on, higher is better """ # progbar stuff for logging batch_size = self.config.batch_size nbatches = (len(train) + batch_size - 1) // batch_size prog = Progbar(target=nbatches) # iterate over dataset for i, (words, labels, pred_flags) in enumerate(minibatches(train, batch_size)): fd, _ = self.get_feed_dict(words, labels, pred_flags, self.config.lr, self.config.dropout) _, train_loss, train_summary = self.sess.run( [self.train_op, self.loss, self.train_merged], feed_dict=fd) prog.update(i + 1, [("{}_train loss".format(self.config.name), train_loss)]) # tensorboard if i % 10 == 0: self.train_file_writer.add_summary(train_summary, epoch*nbatches + i) yield None metrics = self.run_evaluate(dev, log_step=epoch*nbatches+i) msg = " - ".join(["{} {:04.2f}".format(k, v) for k, v in metrics.items()]) self.logger.info(msg) # Score for early stopping return self.config.early_stop_metric_sign * metrics[self.config.early_stopping_metric]
def run_epoch(self, sess, train, dev, vocab_tags, vocab_words, epoch, saver): """ performs one complete pass over the train set and evaluate on dev Args: sess: tensorflow session train: dataset that yields tuple of sentences, tags dev: dataset vocab_tags: {tag: index} dictionary epoch:(int) saver: tf saver instance """ num_batches = (len(train)+self.config.batch_size -1) // self.config.batch_size start_time = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime()) for step, (sentences, labels) in enumerate(minibatches(train, vocab_tags, vocab_words, self.config.batch_size)): sys.stdout.write(" processing: {} batch / {} batches.".format(step+1, num_batches)+"\r") step_num = self.config.n_epoch * num_batches + step + 1 fd, _ = self.get_feed_dict(sentences, labels, self.config.learning_rate, self.config.dropout) _, train_loss, summary, step_num_ = sess.run([self.train_op, self.loss, self.merged, self.global_step], feed_dict=fd) if step + 1==1 or (step+1) % 100 == 0 or step+1 == num_batches: self.logger.info("{} epoch {}, step {}, loss: {:.4}, metrics: {}, global_step:{}".format( start_time, epoch+1, step+1, train_loss, self.run_evaluate(sess, dev, vocab_tags, vocab_words), step_num_ )) self.file_writer.add_summary(summary, step_num_) if step+1 == num_batches: saver.save(sess, self.config.model_path, global_step=step_num_) metrics = self.run_evaluate(sess, dev, vocab_tags, vocab_words) msg = " - ".join(["{} {:04.4f}".format(k, v) for k, v in metrics.items()]) self.logger.info(msg) return metrics["acc"]