예제 #1
0
파일: eval.py 프로젝트: xhuang28/NewBioNer
            single_dev_datasets = []
            for dataloader in dev_dataset_loader:
                single_dev_datasets.append([dl.dataset for dl in dataloader])
            pickle.dump(single_dev_datasets, open( args.pickle + "/temp_single_dev.p", "wb" ))

            single_test_datasets = []
            for dataloader in test_dataset_loader:
                single_test_datasets.append([dl.dataset for dl in dataloader])
            print("DUMP temp_single_test")
            pickle.dump(single_test_datasets, open( args.pickle + "/temp_single_test.p", "wb" ))
        



        #agent.eval_batch_corpus(dev_dataset_loader, train_args['dev_file'], corpus2crf)
        agent.eval_batch_corpus(test_dataset_loader, train_args['test_file'], corpus2crf)

    # global prediction
    if args.if_pred:
        idx2tag = {idx: tag for tag, idx in tag2idx.items()}
        for file_name in args.pred_file:
            pred_lines = []
            with codecs.open(file_name, 'r', 'utf-8') as f:
                pred_lines = f.readlines()
            pred_tokens, gold_labels = utils.read_corpus(pred_lines)

            pred_corpus_tagspace = set(["<start>", "<pad>"])
            for sent_labels in gold_labels:
                pred_corpus_tagspace |= set(sent_labels)

            # local pred: use crfs binds to each training corpus
예제 #2
0
        args.epoch = args.start_epoch + args.epoch
        epoch_list = range(args.start_epoch, args.epoch)
    else:
        args.epoch += 1
        epoch_list = range(1, args.epoch)

    predictor = Predictor(tag2idx, packer, label_seq=True, batch_size=50)
    evaluator = Evaluator(predictor, packer, tag2idx, args.eva_matrix,
                          args.pred_method)

    trainer = Trainer(ner_model, packer, crit_ner, crit_lm, optimizer,
                      evaluator, crf2corpus, args.plateau)
    trainer.train(crf2train_dataloader, crf2dev_dataloader, dev_dataset_loader,
                  epoch_list, args)

    trainer.eval_batch_corpus(dev_dataset_loader, args.dev_file,
                              args.corpus2crf)

    try:
        print("Load from PICKLE")
        single_testset = pickle.load(
            open(args.pickle + "/temp_single_test.p", "rb"))
        test_dataset_loader = []
        for datasets_tuple in single_testset:
            test_dataset_loader.append([
                torch.utils.data.DataLoader(tup,
                                            50,
                                            shuffle=False,
                                            drop_last=False)
                for tup in datasets_tuple
            ])
    except: