Esempio n. 1
0
def valid_eval(data_in, task='FNER', eval_type=None, final=False):
    m1 = data_in['mention']
    l1 = data_in['left_context']
    r1 = data_in['right_context']
    lab = data_in['label']
    lf_id = pad_single(l1)
    rt_id = pad_single(r1)
    m_ = pad_single(m1)
    # m_, lf_id, rt_id = pad_method(m1, l1, r1)
    collector = []
    true = []
    eval_loss = []
    iters = 0
    p1 = 100
    total_loss = []
    iters = 0
    for k in range(0, len(m_), p1):
        s = Model.predict(lf_id[k:k + p1],
                          rt_id[k:k + p1],
                          context_data=None,
                          mention_representation_data=m_[k:k + p1],
                          feature_data=None,
                          doc_vector=None)
        loss_val = Model.error(lf_id[k:k + p1],
                               rt_id[k:k + p1],
                               lab[k:k + p1],
                               context_data=None,
                               mention_representation_data=m_[k:k + p1],
                               feature_data=None,
                               doc_vector=None)

        r = lab[k:k + p1]
        collector.append(s)
        true.append(r)
        total_loss.append(loss_val)
        iters += 1
    average_eval_loss = sum(total_loss) / iters
    print(task + " Loss: ", average_eval_loss)
    collector = np.array(collector)
    collector = np.vstack(collector)
    collector = np.squeeze(collector)
    true = np.array(true)
    true = np.vstack(true)
    print(collector.shape, true.shape)
    strict_f1 = acc_hook(collector, true)
    logging.info(str(eval_type) + " FNER loss: {}".format(average_eval_loss))
    if final:
        fname = args.dataset + "_" + args.encoder + "_" + str(
            args.feature) + "_" + str(args.hier) + "_" + str(
                args.dataset_kge) + ".txt"
        save_predictions(collector, true, dicts["id2label"], fname)
    return strict_f1
Esempio n. 2
0
def valid_eval(data_in, task, eval_type=None, final=False):
    if task == 'FNER':
        collector = []
        true = []
        iters = 0
        total_loss = []
        c_, m_, lab, f, d, s_in, m_id, l_id, r_id = data_in.next()
        lf_id = pad_single(l_id)
        rt_id = pad_single(r_id)
        rt_id = np.flip(rt_id, axis=-1)
        p1 = 100
        for k in range(0, len(c_), p1):
            s = Model.predict(lf_id[k:k + p1],
                              rt_id[k:k + p1],
                              context_data=None,
                              mention_representation_data=m_[k:k + p1],
                              feature_data=f[k:k + p1],
                              doc_vector=None)
            loss_val = Model.error(lf_id[k:k + p1],
                                   rt_id[k:k + p1],
                                   lab[k:k + p1],
                                   context_data=None,
                                   mention_representation_data=m_[k:k + p1],
                                   feature_data=f[k:k + p1],
                                   doc_vector=None)

            r = lab[k:k + p1]
            collector.append(s)
            true.append(r)
            total_loss.append(loss_val)
            iters += 1
        average_eval_loss = sum(total_loss) / iters
        print(task + " Loss: ", average_eval_loss)
        collector = np.array(collector)
        collector = np.vstack(collector)
        collector = np.squeeze(collector)
        true = np.array(true)
        true = np.vstack(true)
        print(collector.shape, true.shape)
        # print(collector)
        # print(true)
        strict_f1 = acc_hook(collector, true)
        logging.info(
            str(eval_type) + " FNER loss: {}".format(average_eval_loss))
        if final:
            fname = args.dataset + "_" + args.encoder + "_" + str(
                args.feature) + "_" + str(args.hier) + "_" + str(
                    args.dataset_kge) + ".txt"
            save_predictions(collector, true, dicts["id2label"], fname)
        return strict_f1
Esempio n. 3
0
        print "\r%d" % i,
        batch_data = train_batcher.next()
        # if i%(step_par_epoch/10)==0:
        #     loss = model.printer([model.LM_loss_total, model.type_loss], batch_data)
        #     print(loss)
        model.train(batch_data)

    print "------dev--------"
    batch_data = dev_batcher.next()
    scores = model.predict(batch_data)
    acc_hook(scores, batch_data["Y"])

    model.load_all("./Models/" + d + "/lamb" + str(args.lamb) + "/model")
    print "-----test--------"
    batch_data = test_batcher.next()
    scores = model.predict(batch_data)
    acc_hook(scores, batch_data["Y"])
    print

#    model.save_all(epoch, save_id)

print "Training completed.  Below are the final test scores: "
print "-----test--------"
batch_data = test_batcher.next()
scores = model.predict(batch_data)
acc_hook(scores, batch_data["Y"])
fname = args.dataset + "_" + args.encoder + "_" + str(
    args.feature) + "_" + str(args.hier) + ".txt"
save_predictions(scores, batch_data["Y"], dicts["id2label"], fname)

print "Cheers!"
Esempio n. 4
0
        np.savetxt(args.resultpath + "/scores_epoch" + str(epoch),
                   scores,
                   fmt='%f')
        scores = np.sort(a=scores, axis=1)
        np.savetxt(args.resultpath + "/sorted_scores_epoch" + str(epoch),
                   scores,
                   fmt='%f')
    print "-----test--------"
    context_data, mention_representation_data, target_data, feature_data = test_batcher.next(
    )
    scores = model.predict(context_data, mention_representation_data,
                           feature_data, args.gaussian)
    acc_hook(scores, target_data, args.gaussian, 0, 1, args.path,
             label_hierarchy)

endtime = datetime.datetime.now()
print "total train time: " + str((endtime - time1).total_seconds())

print "Training completed.  Below are the final test scores: "
print "-----test--------"
context_data, mention_representation_data, target_data, feature_data = test_batcher.next(
)
scores = model.predict(context_data, mention_representation_data, feature_data,
                       0)
acc_hook(scores, target_data, args.gaussian, args.path, label_hierarchy)
fname = args.dataset + "_" + args.encoder + "_" + str(
    args.feature) + "_" + str(args.hier) + ".txt"
# fname = args.resultpath + "/prediction"
save_predictions(scores, target_data, dicts["id2label"], fname, args.gaussian)

print "Cheers!"
Esempio n. 5
0
print "decode_size: ", decode_size

# if decode_size is 0 (i.e. the decode_dataset is empty), the serif-instances file must have been empty
# this can happen if the serif name-list adder did not find any names to add as mentions (e.g. if you are using a very short text)
# in such a case, exit with success
if decode_size == 0:
    print "Exiting decoding since decode dataset is empty!"
    # save an empty output file; this will avoid other steps in the CauseEX pipeline from failing
    with (open(args.output, "w")) as fp:
        pass
    sys.exit(0)

if decode_dataset["data"].shape[0] == 0:
    print "Dataset is empty. Exit"
    sys.exit()

print "Creating batcher..."
test_batcher = Batcher(decode_dataset["storage"], decode_dataset["data"],
                       decode_dataset["data"].shape[0], 10, dicts["id2vec"])

print "Getting bacther.next..."
context_data, mention_representation_data, target_data, feature_data = test_batcher.next(
)

print "Running decoding..."
scores = model.predict(context_data, mention_representation_data, feature_data)
acc_hook(scores, target_data)
save_predictions(scores, target_data, dicts["id2label"], args.output)

print "Finished decoding! Predicted labels written to: " + args.output
Esempio n. 6
0
File: train.py Progetto: BBN-E/Hume
# Bonan: 31 is hard-coded number (number of batches): 31*1000 (batch size) should be less than the total number of training instances
step_par_epoch = int(train_dataset["data"].shape[0] / 1000)

print "start trainning"
for epoch in range(5):
    train_batcher.shuffle()
    print "epoch",epoch
    for i in range(step_par_epoch):
        print "step",i
        context_data, mention_representation_data, target_data, feature_data = train_batcher.next()
        model.train(context_data, mention_representation_data, target_data, feature_data)
        
    print "------dev--------"
    context_data, mention_representation_data, target_data, feature_data = dev_batcher.next()
    scores = model.predict(context_data, mention_representation_data,feature_data)
    acc_hook(scores, target_data)

print "Training completed.  Below are the final test scores: "
print "Saving model"
model.save(os.path.join(args.model_output_dir,"NFGEC_tf_session"),"data/"+d+"/dicts_figer.pkl")
# using a hard-coded prefix 'NFGEC_tf_session'; the same will be used by decode.py

print "-----test--------"
context_data, mention_representation_data, target_data, feature_data = test_batcher.next()
scores = model.predict(context_data, mention_representation_data, feature_data)
acc_hook(scores, target_data)
fname = args.dataset + "_" + args.encoder + "_" + str(args.feature) + "_" + str(args.hier) + ".txt"
save_predictions(scores, target_data, dicts["id2label"],fname)

print "Cheers!"