def crf(X, y, X_test, y_test): X_features = [sentence_features(sent) for sent in X] test_features = [sentence_features(sent) for sent in X_test] model = sklearn_crfsuite.CRF(algorithm="lbfgs", c1=0.1, c2=0.1, max_iterations=100, all_possible_transitions=True) #choose_model = GridSearchCV(model, cv=3, param_grid={"c1": 0.2 * np.arange(0, 1) + 0.1,"c2": 0.2 * np.arange(0, 1) + 0.1,}) #choose_model.fit(X_features, y) #best_model = choose_model.best_estimator_ #print(choose_model.best_params_) model.fit(X_features, y) labels = list(model.classes_) labels.remove('O') y_pred = model.predict(test_features) #print(metrics.flat_f1_score(y_test, y_pred, average='weighted', labels=labels)) new_conll_sents = data_converter.add_column(X_test, y_pred) conlleval.evaluate( data_converter.conll_to_data_stream(new_conll_sents, write_to_file="crf_results.testb"))
def main(): sents = read_data('../data/onto.testa') print(sents[1]) tags = extract_columns(sents, [-1]) print(tags[1]) # copy ground-truth label as the predicted label new_sents = extend_columns(sents, tags) print(new_sents[1]) data_to_output(new_sents, write_to_file="out.testa") conlleval.evaluate(data_to_output(new_sents))
def main(): print(tags_from_conll(test_tags, scheme='bioes')) print(tags_to_conll(tags_from_conll(test_tags, scheme='bioes'))) conll_sents = read_conll('../data/eng.testa') print(conll_sents[1]) conll_tags = get_column(conll_sents, -1) tags = tags_from_conll(conll_tags, scheme='bio') print(tags[1]) new_conll_sents = add_column(conll_sents, tags_to_conll(conll_tags)) print(new_conll_sents[1]) conll_to_data_stream(new_conll_sents, write_to_file="tmp.testa") conlleval.evaluate(conll_to_data_stream(new_conll_sents))
def evaluate(mode, data_instance, label_alphabet, data, model): model.eval() pred_results = [] gold_results = [] for batch_id in range(len(data_instance) // data.HP_batch_size + 1): instance = data_instance[batch_id * data.HP_batch_size: (batch_id + 1) * data.HP_batch_size \ if (batch_id + 1) * data.HP_batch_size < len(data_instance) else len(data_instance)] if not instance: continue instance_batch_data = batchify_with_label(instance, data.HP_gpu, True) tag_seq = model(mode, instance_batch_data[0], instance_batch_data[1], instance_batch_data[3], instance_batch_data[4], instance_batch_data[5], instance_batch_data[8]) pred_label, gold_label = recover_label(tag_seq, instance_batch_data[6], instance_batch_data[8], label_alphabet, instance_batch_data[2]) pred_results += pred_label gold_results += gold_label p, r, f = conlleval.evaluate(gold_results, pred_results, verbose=False) print("precision {0}, recall {1}, f1 {2}".format(p, r, f)) return p, r, f
def evaluate(self, predictions, groundTruths, *args, **kwargs): self.log.debug("Invoked evaluate method") self.log.debug("With parameters : ") self.log.debug(predictions) self.log.debug(groundTruths) self.log.debug(args) self.log.debug(kwargs) true_vals = list() pred_vals = list() if predictions is None and groundTruths is None: with open(kwargs.get("predsPath", '../results/predictions.txt'), mode='r', encoding='utf-8') as f: next(f) next(f) raw_preds = f.read().splitlines() for x in range(len(raw_preds)): if raw_preds[x] != "" or len(raw_preds[x]) != 0: true_vals.append(raw_preds[x].split(" ")[1]) pred_vals.append(raw_preds[x].split(" ")[2]) else: true_vals = groundTruths pred_vals = predictions eval_metrics = evaluate(true_vals, pred_vals, False) self.log.debug("Returning evaluation metrics [P, R, F1] :") self.log.debug(eval_metrics) return eval_metrics
def cross_evaluation(multi_iter, model, task_vocab, domain_vocab, multi_labels): logger = logging.getLogger("ner") with torch.no_grad(): for item_key, item_iter in multi_iter.items(): golden_label = [] predict_label = [] for dev_batch in item_iter: tokens_idx, chars_idx, tags_idx, tokens_mask, sentence_len, sentence_text, sentence_tags = dev_batch task_id = torch.LongTensor([task_vocab[item_key[0]]]) domain_id = torch.LongTensor([domain_vocab[item_key[1]]]) token_score, tags_idx, label_hat, _ = model(tokens_idx, chars_idx, sentence_len, tags_idx, None, None, tokens_mask, task_id, domain_id, item_key) for p_seq, g_seq, t_seq in zip(label_hat.cpu().numpy().tolist(), tags_idx.cpu().numpy().tolist(), tokens_mask.cpu().numpy().tolist()): g_labels = [] p_labels = [] for p_label_idx, g_label_idx, t_mask in zip(p_seq, g_seq, t_seq): if t_mask == 1: p_label = multi_labels[item_key].get_label(p_label_idx) g_label = multi_labels[item_key].get_label(g_label_idx) p_labels.append(p_label) g_labels.append(g_label) golden_label.extend(g_labels) predict_label.extend(p_labels) precision, recall, f1 = conlleval.evaluate(golden_label, predict_label, verbose=False) logger.info("eval item_key {0} precision {1}, recall {2}, f1 {3}.".format(item_key, precision, recall, f1))
def evaluate(self): # 压平到一维 self.y_pred = sum(self.y_pred, []) self.y_true = sum(self.y_true, []) print(self.y_true) # 转换对应的label self.y_pred = [self.label_dict[int(i)] for i in self.y_pred] self.y_true = [self.label_dict[int(i)] for i in self.y_true] return evaluate(self.y_true, self.y_pred, verbose=False)
def crf_eval(dev_fname, model_file): ''' infers on dev set using model found in model_file and prints F1 scores input - dev_fname : file path to dev set model_file : file path to crf model ''' dev_sentences = data_converter.read_data(dev_fname) dev_features = local_features.add_local_features(dev_sentences) dev_labels = data_converter.get_column(dev_sentences, -1) crf_tagger = pycrfsuite.Tagger() crf_tagger.open(model_file) dev_predictions = [crf_tagger.tag(xseq) for xseq in dev_features] iterable = [] for labels, predictions in zip(dev_labels, dev_predictions): for label, prediction in zip(labels, predictions): iterable.append("dummy\t{}\t{}".format(label, prediction)) iterable.append("") conlleval.evaluate(iterable)
def evaluation(dev_iter, model, label_vocab, logger): golden_label = [] predict_label = [] with torch.no_grad(): for dev_batch in dev_iter: tokens_idx, chars_idx, tags_idx, tokens_mask, sentence_len, _, _ = dev_batch token_score, token_hat, tags_idx = model(tokens_idx, chars_idx, sentence_len, tags_idx) for p_seq, g_seq, t_seq in zip(token_hat.cpu().numpy().tolist(), tags_idx.cpu().numpy().tolist(), tokens_mask.cpu().numpy().tolist()): g_labels = [] p_labels = [] for p_label_idx, g_label_idx, t_mask in zip(p_seq, g_seq, t_seq): if t_mask == 1: p_label = label_vocab.get_label(p_label_idx) g_label = label_vocab.get_label(g_label_idx) p_labels.append(p_label) g_labels.append(g_label) golden_label.extend(g_labels) predict_label.extend(p_labels) precision, recall, f1 = conlleval.evaluate(golden_label, predict_label, verbose=False) logger.info("eval precision {0}, recall {1}, f1 {2}.".format(precision, recall, f1)) return f1
saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir='checkpoints')) sess.run(tf.tables_initializer()) val_feeder = LSTMCRFeeder(val_x, val_feats, val_la, max_length, model.feat_size, 16) test_feeder = LSTMCRFeeder(test_x, test_feats, test_la, max_length, model.feat_size, 16) preds = [] for step in tqdm(range(val_feeder.step_per_epoch)): tokens, feats, labels = val_feeder.feed() pred = model.test(sess, tokens, feats) preds.extend(pred) true_seqs = [idx2la[la] for sl in test_la for la in sl] pred_seqs = [idx2la[la] for sl in preds for la in sl] ll = min(len(true_seqs), len(pred_seqs)) _, _, f1 = evaluate(true_seqs[:ll], pred_seqs[:ll], False) print('Val F1 = %f' % f1) preds = [] for step in tqdm(range(test_feeder.step_per_epoch)): tokens, feats, labels = test_feeder.feed() pred = model.test(sess, tokens, feats) preds.extend(pred) true_seqs = [idx2la[la] for sl in test_la for la in sl] pred_seqs = [idx2la[la] for sl in preds for la in sl] ll = min(len(true_seqs), len(pred_seqs)) _, _, f1 = evaluate(true_seqs[:ll], pred_seqs[:ll], False) print('Test F1 = %f' % f1)
def train(self, data, *args, **kwargs): if not os.path.isfile( kwargs.get("parsedDumpPath", '../dev/parsedDataDump.pkl')): self.data_converter(data, *args, **kwargs) with open(kwargs.get("parsedDumpPath", '../dev/parsedDataDump.pkl'), 'rb') as fp: train_set, val_set, test_set, dicts = pickle.load(fp) w2idx, la2idx = dicts['words2idx'], dicts['labels2idx'] idx2w = {w2idx[k]: k for k in w2idx} idx2la = {la2idx[k]: k for k in la2idx} train_x, train_chars, train_la = train_set val_x, val_chars, val_la = val_set test_x, test_chars, test_la = test_set self.log.debug('Loading elmo!') elmo_batcher = Batcher(kwargs.get("vocabPath", '../dev/vocab.txt'), 50) elmo_bilm = BidirectionalLanguageModel( kwargs.get( "elmoOptionsFile", '../resources/elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_options.json' ), kwargs.get( "elmoWeightFile", '../resources/elmo/elmo_2x4096_512_2048cnn_2xhighway_5.5B_weights.hdf5' )) self.log.debug('Loading model!') num_classes = len(la2idx.keys()) max_seq_length = max( max(map(len, train_x)), max(map(len, test_x)), ) max_word_length = max( max([len(ssc) for sc in train_chars for ssc in sc]), max([len(ssc) for sc in test_chars for ssc in sc])) model = ElmoModel( True, kwargs.get("wordEmbeddingSize", 50), # Word embedding size kwargs.get("charEmbeddingSize", 16), # Character embedding size kwargs.get("LSTMStateSize", 200), # LSTM state size kwargs.get("filterNum", 128), # Filter num kwargs.get("filterSize", 3), # Filter size num_classes, max_seq_length, max_word_length, kwargs.get("learningRate", 0.015), kwargs.get("dropoutRate", 0.5), elmo_bilm, 1, # elmo_mode elmo_batcher, **kwargs) self.log.debug('Start training...') self.log.debug('Train size = %d' % len(train_x)) self.log.debug('Val size = %d' % len(val_x)) self.log.debug('Test size = %d' % len(test_x)) self.log.debug('Num classes = %d' % num_classes) start_epoch = 1 max_epoch = kwargs.get("maxEpoch", 100) self.log.debug('Epoch = %d' % max_epoch) saver = tf.train.Saver() best_saver = BestCheckpointSaver(save_dir=kwargs.get( "bestCheckpointPath", "../results/checkpoints/best"), num_to_keep=1, maximize=True) config = tf.ConfigProto() config.gpu_options.allow_growth = True sess = tf.Session(config=config) latest_checkpoint = tf.train.latest_checkpoint( checkpoint_dir=kwargs.get("checkpointPath", "../results/checkpoints")) if latest_checkpoint: saver.restore(sess, latest_checkpoint) else: sess.run(tf.global_variables_initializer()) sess.run(tf.tables_initializer()) train_feeder = LSTMCNNCRFeeder(train_x, train_chars, train_la, max_seq_length, max_word_length, kwargs.get("epochWidth", 16)) val_feeder = LSTMCNNCRFeeder(val_x, val_chars, val_la, max_seq_length, max_word_length, kwargs.get("epochWidth", 16)) for epoch in range(start_epoch, max_epoch + 1): loss = 0 for step in range(train_feeder.step_per_epoch): tokens, chars, labels = train_feeder.feed() step_loss = model.train_step(sess, tokens, chars, labels) loss += step_loss self.log.debug( 'epoch: %d, size: %d/%d, step_loss: %f, epoch_loss: %f', epoch, train_feeder.offset, train_feeder.size, step_loss, loss) preds = [] for step in range(val_feeder.step_per_epoch): tokens, chars, labels = val_feeder.feed() pred = model.test(sess, tokens, chars) preds.extend(pred) true_seqs = [idx2la[la] for sl in val_la for la in sl] pred_seqs = [idx2la[la] for sl in preds for la in sl] ll = min(len(true_seqs), len(pred_seqs)) self.log.debug(true_seqs[:ll]) self.log.debug(pred_seqs[:ll]) prec, rec, f1 = evaluate(true_seqs[:ll], pred_seqs[:ll], False) self.log.debug("Epoch: %d, val_p: %f, val_r: %f, val_f1: %f", epoch, prec, rec, f1) val_feeder.next_epoch(False) saver.save(sess, kwargs.get("checkpointPath", "../results/checkpoints") + '/model.ckpt', global_step=epoch) best_saver.handle(f1, sess, epoch) logging.info('') train_feeder.next_epoch() self.log.debug("Training done! ... Saving trained model") return model, sess, saver
def run_model(self, batches, test_batches, char_batches, test_char_batches, test_sents, tags, train_feat, test_feat, testb_feat, testb_char_batch, testb_batch, testb, max_test): init = tf.global_variables_initializer() with tf.Session() as self.sess: init.run() epoch = 1 while True: ## Train epoch_loss = float(0) acc_train = 0 epoch += 1 for batch in range(len(batches)): X_batch, X_len, y_batch = batches[batch] feed_dict = {self.inputs: X_batch, self.sequence_length: X_len, self.keep_prob: self.keep_ratio, self.y:y_batch, self.max_data: self.max_len } if self.pretrain: feed_dict[self.embedding_placeholder] = self.my_embeddings if self.char: X_char_batch, X_char_len = char_batches[batch] feed_dict[self.char_embed_placeholder]= self.char_embeddings feed_dict[self.char_inputs] = X_char_batch feed_dict[self.word_length] = X_char_len if self.features: feed_dict[self.feat_inputs] = train_feat[batch] _, loss_val, pred = self.sess.run([self.training_op, self.loss, self.predicted_label], feed_dict=feed_dict) #print(pred) acc_train += self.accuracy.eval(feed_dict=feed_dict) epoch_loss += loss_val ## Test acc_test = 0 pred = list() for batch in range(len(test_batches)): X_batch, X_len, y_batch = test_batches[batch] feed_dict = {self.inputs: X_batch, self.sequence_length: X_len, self.keep_prob: 1, self.y:y_batch, self.max_data: self.max_len } if self.pretrain: feed_dict[self.embedding_placeholder] = self.my_embeddings if self.char: X_char_batch, X_char_len = test_char_batches[batch] feed_dict[self.char_inputs] = X_char_batch feed_dict[self.word_length] = X_char_len feed_dict[self.char_embed_placeholder] = self.char_embeddings if self.features: feed_dict[self.feat_inputs] = test_feat[batch] acc_test += self.accuracy.eval(feed_dict=feed_dict) if epoch % 20 == 2: pred.extend(self.predicted_label.eval(feed_dict=feed_dict).tolist()) if epoch % 20 == 2: for sent in range(len(test_sents)): for word in range(len(test_sents[sent])): pred[sent][word] = tags[pred[sent][word]] pred[sent] = pred[sent][:len(test_sents[sent])] new_conll_sents = data_converter.add_column(test_sents, data_converter.tags_to_conll(pred)) conlleval.evaluate(data_converter.conll_to_data_stream(new_conll_sents, write_to_file="rnn.testa")) print(epoch, "Train accuracy:", acc_train / float(len(batches)), "Loss: ", epoch_loss / float(len(batches)), "Test accuracy: ", acc_test / float(len(test_batches))) if epoch == 100: pred = list() for batch in range(len(testb_batch)): X_batch, X_len, y_batch = testb_batch[batch] feed_dict = {self.inputs: X_batch, self.sequence_length: X_len, self.keep_prob: 1, self.max_data: max_test } if self.pretrain: feed_dict[self.embedding_placeholder] = self.my_embeddings if self.char: X_char_batch, X_char_len = testb_char_batch[batch] feed_dict[self.char_inputs] = X_char_batch feed_dict[self.word_length] = X_char_len feed_dict[self.char_embed_placeholder] = self.char_embeddings if self.features: feed_dict[self.feat_inputs] = testb_feat[batch] pred.extend(self.predicted_label.eval(feed_dict=feed_dict).tolist()) for sent in range(len(testb)): for word in range(len(testb[sent])): pred[sent][word] = tags[pred[sent][word]] pred[sent] = pred[sent][:len(testb[sent])] new_conll_sents = data_converter.add_column(testb, data_converter.tags_to_conll(pred)) conlleval.evaluate( data_converter.conll_to_data_stream(new_conll_sents, write_to_file="rnn_results.testb")) break