예제 #1
0
        pred_numpy = (y_pred.data).cpu().numpy()
        y_pred_labels = [ix_to_label[ix] for ix in pred_numpy]
        assert len(y_pred_labels) == len(
            features), 'y_pred_labels and features have different lengths'
        for i, pred_label in enumerate(y_pred_labels):
            features[i][5] = pred_label
            instances.append(features[i])

    acc = 100.0 * correct / total

    return acc, instances


if __name__ == "__main__":

    traindocuments = parserNcbiTxtFile_simple(opt.train_file)
    devdocuments = parserNcbiTxtFile_simple(opt.dev_file)
    testdocuments = parserNcbiTxtFile_simple(opt.test_file)

    entityAbbres = loadAbbreviations(opt.abbre_file)
    preprocessMentions(traindocuments, devdocuments, testdocuments,
                       entityAbbres)
    dict = load_dict(opt.dict_file)
    meshlabels, meshlabel_to_ix, dict_words = utils.parser_dict(dict)

    corpus_words = utils.parser_corpus(traindocuments, devdocuments,
                                       testdocuments)
    word_to_ix, all_words, char_to_ix = utils.generate_word_alphabet(
        corpus_words, dict_words)

    if opt.random_emb:
예제 #2
0
	return entity_docs


if __name__ == '__main__':

	ner_path = "/home/lyx/workspace/Dnorm_ncbi/ncbi_test_plain_ner"
	output_path_doc = "./sample_data/ncbi_test_ner_evalNorm"
	output_path_entity = "/home/lyx/workspace/Dnorm_ncbi/ncbi_test_plain_ner_entities"

	ncbi_ner_path = "/home/lyx/workspace/Dnorm_ncbi/output/analysis_ncbi.txt"



	# entity_docs = load_entity_doc(ner_path)
	entity_docs = load_entity_doc(ncbi_ner_path)
	test_documents = parserNcbiTxtFile_simple(opt.test_file)


	for i in range(len(entity_docs)):
		isfind = False
		for test_doc in test_documents:
			if entity_docs[i].doc_name == test_doc.doc_name:
				isfind =True
				entity_docs[i].title = test_doc.title
				entity_docs[i].abstractt = test_doc.abstractt
				break
		if not isfind:
			print(entity_docs[i].doc_name)
	outputDocuments_title_abstract_entity(output_path_doc, entity_docs)
	# outputDocuments_ner_entities(output_path_entity, entity_docs)
	print('end')