예제 #1
0
def pred(model, best_model_path, result_conll_path, fname="test.csv"):
    learner = NerLearner(model,
                         data,
                         best_model_path=model_dir +
                         "/norne/bilstm_attn_cased_en.cpt",
                         lr=0.01,
                         clip=1.0,
                         sup_labels=[
                             l for l in data.id2label
                             if l not in ['<pad>', '[CLS]', 'X', 'B_O', 'I_']
                         ])
    dl = get_bert_data_loader_for_predict(data_path + fname, learner)

    learner.load_model(best_model_path)

    preds = learner.predict(dl)

    tokens, y_true, y_pred, set_labels = bert_preds_to_ys(dl, preds)
    clf_report = flat_classification_report(y_true,
                                            y_pred,
                                            set_labels,
                                            digits=3)

    # clf_report = get_bert_span_report(dl, preds)
    print(clf_report)

    write_true_and_pred_to_conll(tokens=tokens,
                                 y_true=y_true,
                                 y_pred=y_pred,
                                 conll_fpath=result_conll_path)
예제 #2
0
def train(model, num_epochs=20):
    learner = NerLearner(model,
                         data,
                         best_model_path=model_dir +
                         "/norne/bilstm_attn_lr0_1_cased_en.cpt",
                         lr=0.1,
                         clip=1.0,
                         sup_labels=[
                             l for l in data.id2label
                             if l not in ['<pad>', '[CLS]', 'X', 'B_O', 'I_']
                         ],
                         t_total=num_epochs * len(data.train_dl))

    learner.fit(num_epochs, target_metric='f1')

    dl = get_bert_data_loader_for_predict(data_path + "valid.csv", learner)

    learner.load_model()

    preds = learner.predict(dl)

    print(
        validate_step(learner.data.valid_dl, learner.model,
                      learner.data.id2label, learner.sup_labels))

    clf_report = get_bert_span_report(dl, preds, [])
    print(clf_report)
예제 #3
0
 torch.cuda.set_device(5)
 data = NerData.create(train_path,
                       valid_path,
                       vocab_file,
                       is_cls=True,
                       batch_size=16)
 print(data.label2idx)
 # model = BertBiLSTMAttnNCRFJoint.create(
 #      len(data.label2idx), len(data.cls2idx), bert_config_file, init_checkpoint_pt,
 #     enc_hidden_dim=1024, rnn_layers=1, num_heads=5, input_dropout=0.5, nbest=12)
 model = BertBiLSTMAttnNMTJoint.create(len(data.label2idx),
                                       len(data.cls2idx),
                                       bert_config_file,
                                       init_checkpoint_pt,
                                       enc_hidden_dim=512,
                                       rnn_layers=1,
                                       dec_embedding_dim=128,
                                       dec_hidden_dim=512,
                                       input_dropout=0.5,
                                       nbest=8)
 # model = torch.nn.DataParallel(model, [2, 3])
 num_epochs = 150
 learner = NerLearner(
     model,
     data,
     best_model_path="/home/aaemeljanov/models/AGRR-2019/slavic-nmt.cpt",
     lr=0.0001,
     clip=1.0,
     sup_labels=data.id2label[1:],
     t_total=num_epochs * len(data.train_dl))
 learner.fit(num_epochs, target_metric='f1')
예제 #4
0
data_path = "/media/liah/DATA/ner_data_other/norne/"

train_path = data_path + "train.txt"
dev_path = data_path + "valid.txt"
test_path = data_path + "test.txt"

dl = get_bert_data_loader_for_predict(data_path + "valid.csv", learner)

model = BertBiLSTMAttnNMT.create(len(data.label2idx),
                                 bert_config_file,
                                 init_checkpoint_pt,
                                 enc_hidden_dim=128,
                                 dec_hidden_dim=128,
                                 dec_embedding_dim=16)

learner = NerLearner(model,
                     data,
                     best_model_path=model_dir +
                     "conll-2003/bilstm_attn_cased.cpt",
                     lr=0.01,
                     clip=1.0,
                     sup_labels=[
                         l for l in data.id2label
                         if l not in ['<pad>', '[CLS]', 'X', 'B_O', 'I_']
                     ],
                     t_total=num_epochs * len(data.train_dl))
learner.load_model(best_model_path)

preds = learner.predict(dl)
예제 #5
0
 data = NerData.create(train_path,
                       valid_path,
                       vocab_file,
                       is_cls=False,
                       batch_size=16)
 print(data.label2idx)
 print("data.tokenizer.vocab:", len(data.tokenizer.vocab))
 model = BertBiLSTMAttnNCRF.create(len(data.label2idx),
                                   bert_config_file,
                                   init_checkpoint_pt,
                                   enc_hidden_dim=1024,
                                   rnn_layers=1,
                                   num_heads=6,
                                   input_dropout=0.5,
                                   nbest=11)
 print(model)
 # model = BertBiLSTMAttnNMTJoint.create(
 #    len(data.label2idx), len(data.cls2idx), bert_config_file, init_checkpoint_pt,
 #    enc_hidden_dim=128, rnn_layers=1, dec_embedding_dim=32, dec_hidden_dim=128, input_dropout=0.5, nbest=11)
 # model = torch.nn.DataParallel(model, [2, 3])
 num_epochs = 150
 learner = NerLearner(
     model,
     data,
     best_model_path=
     "/home/eartemov/ae/work/models/AGRR-2019/slavic_without_clf.cpt",
     lr=0.0001,
     clip=1.0,
     sup_labels=data.id2label[1:],
     t_total=num_epochs * len(data.train_dl))
 learner.fit(num_epochs, target_metric='f1')