def test(model, train, validation, test_data): loss, accuracy = model.evaluate_generator(validation, steps=math.ceil(validation.n/config.BATCH_SIZE)) train_loss, train_accuracy = model.evaluate_generator(train, steps=math.ceil(train.n/config.BATCH_SIZE)) results = evaluate.get_results(model, validation) probabilities = list(evaluate.transform_binary_probabilities(results)) labels = list(evaluate.get_labels(validation)) test_loss = 0 test_accuracy = 0 test_probabilities = [] test_labels = [] if len(test_data.classes) != 0: test_loss, test_accuracy = model.evaluate_generator(test_data, steps=math.ceil(test_data.n/config.BATCH_SIZE)) test_results = evaluate.get_results(model, test_data) test_probabilities = list(evaluate.transform_binary_probabilities(test_results)) test_labels = list(evaluate.get_labels(test_data)) return { "train_accuracy": float(train_accuracy), "train_loss": float(train_loss), "accuracy": float(accuracy), "loss": float(loss), "test_accuracy": float(test_accuracy), "test_loss": float(test_loss), "probabilities": probabilities, "labels": labels, "test_probabilities": test_probabilities, "test_labels": test_labels, }
def predict(df): basepath = '/media/user1/preprocessed' features = [] truth_label = [] df.sort() print(df) label = np.zeros((1, )) for name in df: print(name) label[0] = 0 vector = np.zeros(800) if 'COR' in name or 'SUB' in name: label[0] = 1 labels_df = pd.read_csv('./Book1.csv') path = labels_df['ID'].tolist() if name in path: label[0] = 1 basedir = os.path.normpath(basepath) print(basedir) files = glob(basedir + '/' + name + '*.npy') files.sort() l = len(files) if l == 0: break for i in range(l): max_pro = 0 img = np.load(files[i]) s = img.shape if s[0] != 224: img = misc.imresize(img, (224, 224), 'bilinear') img = np.stack((img, img, img), axis=2) img = img[np.newaxis, :, :, :] test_generator = Dataset( img, label, augment=False, shuffle=False, input_form='t1', seed=seed, ) test_generator.reset() test_results = evaluate.get_results(model, test_generator) probabilities = list( evaluate.transform_binary_probabilities(test_results)) vector[i] = probabilities[0] features.append(vector) truth_label.append(label[0]) return features, truth_label
def test_model(model, train, validation, test): loss, accuracy = model.evaluate_generator( validation, steps=math.ceil(len(validation) / config.BATCH_SIZE)) train_loss, train_accuracy = model.evaluate_generator( train, steps=math.ceil(len(train) / config.BATCH_SIZE)) test_loss, test_accuracy = model.evaluate_generator( test, steps=math.ceil(len(test) / config.BATCH_SIZE)) # think you mean train here train.reset() validation.reset() test.reset() results = evaluate.get_results(model, validation) probabilities = list(evaluate.transform_binary_probabilities(results)) labels = list(evaluate.get_labels(validation)) test_results = evaluate.get_results(model, test) test_probabilities = list( evaluate.transform_binary_probabilities(test_results)) test_labels = list(evaluate.get_labels(test)) # think you mean train here train.reset() validation.reset() test.reset() return { "train_accuracy": float(train_accuracy), "train_loss": float(train_loss), "accuracy": float(accuracy), "loss": float(loss), "test_accuracy": float(test_accuracy), "test_loss": float(test_loss), "probabilities": probabilities, "labels": labels, "test_probabilities": test_probabilities, "test_labels": test_labels, }
def run_crossvalidation(goldstd_list, corpus, model, cv, crf="stanford", entity_type="all", cvlog="cv.log"): logfile = open(cvlog, 'w') doclist = corpus.documents.keys() random.shuffle(doclist) size = int(len(doclist) / cv) sublists = chunks(doclist, size) logging.debug("Chunks:") logging.debug(sublists) p, r = [], [] all_results = ResultsNER(model) all_results.path = model + "_results" for nlist in range(cv): testids, trainids = None, None testids = sublists[nlist] trainids = list(itertools.chain.from_iterable(sublists[:nlist])) trainids += list(itertools.chain.from_iterable(sublists[nlist + 1:])) train_corpus, test_corpus = None, None print 'CV{} - test set: {}; train set: {}'.format( nlist, len(testids), len(trainids)) train_corpus = Corpus( corpus.path + "_train", documents={did: corpus.documents[did] for did in trainids}) test_corpus = Corpus( corpus.path + "_test", documents={did: corpus.documents[did] for did in testids}) # logging.debug("train corpus docs: {}".format("\n".join(train_corpus.documents.keys()))) #test_entities = len(test_corpus.get_all_entities("goldstandard")) #train_entities = len(train_corpus.get_all_entities("goldstandard")) #logging.info("test set entities: {}; train set entities: {}".format(test_entities, train_entities)) basemodel = model + "_cv{}".format(nlist) logging.debug('CV{} - test set: {}; train set: {}'.format( nlist, len(test_corpus.documents), len(train_corpus.documents))) '''for d in train_corpus.documents: for s in train_corpus.documents[d].sentences: print len([t.tags.get("goldstandard") for t in s.tokens if t.tags.get("goldstandard") != "other"]) sys.exit()''' # train logging.info('CV{} - TRAIN'.format(nlist)) # train_model = StanfordNERModel(basemodel) train_model = None if crf == "stanford": train_model = StanfordNERModel(basemodel, entity_type) elif crf == "crfsuite": train_model = CrfSuiteModel(basemodel, entity_type) train_model.load_data(train_corpus, feature_extractors.keys()) train_model.train() # test logging.info('CV{} - TEST'.format(nlist)) test_model = None if crf == "stanford": test_model = StanfordNERModel(basemodel, entity_type) elif crf == "crfsuite": test_model = CrfSuiteModel(basemodel, entity_type) test_model.load_tagger(port=9191 + nlist) test_model.load_data(test_corpus, feature_extractors.keys(), mode="test") final_results = None final_results = test_model.test(test_corpus, port=9191 + nlist) if crf == "stanford": test_model.kill_process() final_results.basepath = basemodel + "_results" final_results.path = basemodel all_results.entities.update(final_results.entities) all_results.corpus.documents.update(final_results.corpus.documents) # validate """if config.use_chebi: logging.info('CV{} - VALIDATE'.format(nlist)) final_results = add_chebi_mappings(final_results, basemodel) final_results = add_ssm_score(final_results, basemodel) final_results.combine_results(basemodel, basemodel)""" # evaluate logging.info('CV{} - EVALUATE'.format(nlist)) test_goldset = set() for gs in goldstd_list: goldset = get_gold_ann_set(config.paths[gs]["format"], config.paths[gs]["annotations"], entity_type, "pairtype", config.paths[gs]["text"]) for g in goldset[0]: if g[0] in testids: test_goldset.add(g) precision, recall = get_results(final_results, basemodel, test_goldset, {}, []) # evaluation = run_chemdner_evaluation(config.paths[goldstd]["cem"], basemodel + "_results.txt", "-t") # values = evaluation.split("\n")[1].split('\t') p.append(precision) r.append(recall) # logging.info("precision: {} recall:{}".format(str(values[13]), str(values[14]))) pavg = sum(p) / cv ravg = sum(r) / cv print "precision: average={} all={}".format( str(pavg), '|'.join([str(pp) for pp in p])) print "recall: average={} all={}".format(str(ravg), '|'.join([str(rr) for rr in r])) all_goldset = set() for gs in goldstd_list: goldset = get_gold_ann_set(config.paths[gs]["format"], config.paths[gs]["annotations"], entity_type, config.paths[gs]["text"]) for g in goldset: all_goldset.add(g) get_results(all_results, model, all_goldset, {}, [])
def test_model(model, train, validation, test): # , holdout_test): train_loss, train_accuracy = model.evaluate_generator( train, steps=math.ceil(len(train) / config.BATCH_SIZE)) loss, accuracy = model.evaluate_generator( validation, steps=math.ceil(len(validation) / config.BATCH_SIZE)) test_loss, test_accuracy = model.evaluate_generator( test, steps=math.ceil(len(test) / config.BATCH_SIZE)) # holdout_test_loss, holdout_test_accuracy = model.evaluate_generator(holdout_test, steps=math.ceil(len(holdout_test)/config.BATCH_SIZE)) train.reset() validation.reset() test.reset() # holdout_test.reset() # labels - ground truths # results - predicted results from model results = evaluate.get_results(model, validation) probabilities = list(evaluate.transform_binary_probabilities(results)) labels = list(evaluate.get_labels(validation)) test_results = evaluate.get_results(model, test) test_probabilities = list( evaluate.transform_binary_probabilities(test_results)) test_labels = list(evaluate.get_labels(test)) # holdout_test_results = evaluate.get_results(model, holdout_test) # holdout_test_probabilities = list(evaluate.transform_binary_probabilities(holdout_test_results)) # holdout_test_labels = list(evaluate.get_labels(holdout_test)) train.reset() validation.reset() test.reset() # holdout_test.reset() # get binary predictions # holdout_binary_predictions = list(evaluate.transform_binary_predictions(holdout_test_results)) test_binary_predictions = list( evaluate.transform_binary_predictions(test_results)) # get f1 score test_f1_result = f1_score(test_labels, test_binary_predictions) # holdout_f1_result = f1_score(holdout_test_labels, holdout_binary_predictions) return { "train_accuracy": float(train_accuracy), "train_loss": float(train_loss), "accuracy": float(accuracy), "loss": float(loss), "test_accuracy": float(test_accuracy), "test_loss": float(test_loss), # "holdout_test_accuracy": float(holdout_test_accuracy), # "holdout_test_loss": float(holdout_test_loss), "holdout_test_accuracy": float(0), "holdout_test_loss": float(0), "probabilities": probabilities, "labels": labels, "test_probabilities": test_probabilities, "test_labels": test_labels, # "holdout_test_probabilities": holdout_test_probabilities, # "holdout_test_labels": holdout_test_labels, "holdout_test_probabilities": 'na', "holdout_test_labels": 'na', "test_f1_result": test_f1_result, # "holdout_f1_result": holdout_f1_result, "holdout_f1_result": float(0), }
def main(): start_time = time.time() parser = argparse.ArgumentParser(description='') parser.add_argument("actions", default="classify", help="Actions to be performed.") parser.add_argument( "--goldstd", default="", dest="goldstd", nargs="+", help="Gold standard to be used. Will override corpus, annotations", choices=paths.keys()) parser.add_argument("--submodels", default="", nargs='+', help="sub types of classifiers"), parser.add_argument("--models", dest="models", help="model destination path, without extension") parser.add_argument("--pairtype", dest="ptype", help="type of pairs to be considered", default="all") parser.add_argument("--doctype", dest="doctype", help="type of document to be considered", default="all") parser.add_argument( "-o", "--output", "--format", dest="output", nargs=2, help="format path; output formats: xml, html, tsv, text, chemdner.") parser.add_argument("--log", action="store", dest="loglevel", default="WARNING", help="Log level") parser.add_argument("--kernel", action="store", dest="kernel", default="svmtk", help="Kernel for relation extraction") options = parser.parse_args() # set logger numeric_level = getattr(logging, options.loglevel.upper(), None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % options.loglevel) while len(logging.root.handlers) > 0: logging.root.removeHandler(logging.root.handlers[-1]) logging_format = '%(asctime)s %(levelname)s %(filename)s:%(lineno)s:%(funcName)s %(message)s' logging.basicConfig(level=numeric_level, format=logging_format) logging.getLogger().setLevel(numeric_level) logging.getLogger("requests.packages").setLevel(30) logging.info("Processing action {0} on {1}".format(options.actions, options.goldstd)) # set configuration variables based on the goldstd option if the corpus has a gold standard, # or on corpus and annotation options # pre-processing options if options.actions == "load_corpus": if len(options.goldstd) > 1: print "load only one corpus each time" sys.exit() options.goldstd = options.goldstd[0] corpus_format = paths[options.goldstd]["format"] corpus_path = paths[options.goldstd]["text"] corpus_ann = paths[options.goldstd]["annotations"] corenlp_client = StanfordCoreNLP('http://localhost:9000') # corpus = load_corpus(options.goldstd, corpus_path, corpus_format, corenlp_client) corpus = SeeDevCorpus(corpus_path) corpus.load_corpus(corenlp_client) corpus.save(paths[options.goldstd]["corpus"]) if corpus_ann: #add annotation if it is not a test set corpus.load_annotations(corpus_ann, "all") corpus.save(paths[options.goldstd]["corpus"]) elif options.actions == "annotate": # rext-add annotation to corpus if len(options.goldstd) > 1: print "load only one corpus each time" sys.exit() options.goldstd = options.goldstd[0] corpus_path = paths[options.goldstd]["corpus"] corpus_ann = paths[options.goldstd]["annotations"] logging.info("loading corpus %s" % corpus_path) corpus = pickle.load(open(corpus_path, 'rb')) logging.debug("loading annotations...") # corpus.clear_annotations("all") corpus.load_annotations(corpus_ann, "all", options.ptype) # corpus.get_invalid_sentences() corpus.save(paths[options.goldstd]["corpus"]) else: #corpus = SeeDevCorpus("corpus/" + "&".join(options.goldstd)) corpus_path = paths[options.goldstd[0]]["corpus"] logging.info("loading corpus %s" % corpus_path) basecorpus = pickle.load(open(corpus_path, 'rb')) corpus = SeeDevCorpus(corpus_path) corpus.documents = basecorpus.documents if options.actions == "add_sentences": corpus.add_more_sentences(options.models) elif options.actions == "add_goldstandard": corpus.convert_entities_to_goldstandard() corpus.find_ds_relations() #corpus.save(config.paths[options.goldstd[0]]["corpus"]) elif options.actions == "train_multiple": # Train one classifier for each type of entity in this corpus # logging.info(corpus.subtypes) models = TaggerCollection(basepath=options.models, corpus=corpus, subtypes=all_entity_types) models.train_types() elif options.actions == "train_relations": if options.ptype == "all": ptypes = pair_types.keys() # ptypes = config.event_types.keys() else: ptypes = [options.ptype] for p in ptypes: print p if options.kernel == "jsre": model = JSREKernel(corpus, p, train=True) elif options.kernel == "svmtk": model = SVMTKernel(corpus, p) elif options.kernel == "stanfordre": model = StanfordRE(corpus, p) elif options.kernel == "multir": model = MultiR(corpus, p) elif options.kernel == "scikit": model = ScikitRE(corpus, p) elif options.kernel == "crf": model = CrfSuiteRE(corpus, p) # model.train() # testing elif options.actions == "test_multiple": logging.info("testing with multiple classifiers... {}".format( ' '.join(options.submodels))) models = TaggerCollection(basepath=options.models, subtypes=all_entity_types) models.load_models() results = models.test_types(corpus) final_results = results.combine_results() logging.info("saving results...") final_results.save(options.output[1] + ".pickle") elif options.actions == "test_relations": if options.ptype == "all": ptypes = pair_types.keys() # ptypes = config.event_types.keys() all_results = ResultsRE(options.output[1]) all_results.corpus = corpus all_results.path = options.output[1] else: ptypes = [options.ptype] for p in ptypes: print p if options.kernel == "jsre": model = JSREKernel(corpus, p, train=False) elif options.kernel == "svmtk": model = SVMTKernel(corpus, p) elif options.kernel == "rules": model = RuleClassifier(corpus, p) elif options.kernel == "stanfordre": model = StanfordRE(corpus, p) elif options.kernel == "scikit": model = ScikitRE(corpus, p) elif options.kernel == "crf": model = CrfSuiteRE(corpus, p, test=True) model.load_classifier() model.test() results = model.get_predictions(corpus) # results.save(options.output[1] + "_" + p.lower() + ".pickle") # results.load_corpus(options.goldstd[0]) results.path = options.output[1] + "_" + p.lower() goldset = get_gold_ann_set( paths[options.goldstd[0]]["format"], paths[options.goldstd[0]]["annotations"], "all", p, paths[options.goldstd[0]]["text"]) get_relations_results(results, options.models, goldset[1], [], []) if options.ptype == "all": for did in results.document_pairs: if did not in all_results.document_pairs: all_results.document_pairs[did] = Pairs(did=did) all_results.document_pairs[ did].pairs += results.document_pairs[did].pairs if options.ptype == "all": goldset = get_gold_ann_set( paths[options.goldstd[0]]["format"], paths[options.goldstd[0]]["annotations"], "all", "all", paths[options.goldstd[0]]["text"]) get_relations_results(all_results, options.models, goldset[1], [], []) write_seedev_results(all_results, options.output[1]) elif options.actions == "train_sentences": #and evaluate if options.ptype == "all": avg = [0, 0, 0] for p in pair_types: print p tps, fps, fns = corpus.train_sentence_classifier(p) if tps == 0 and fns == 0: precision, recall, fscore = 0, 1, 1 else: precision = 1.0 * tps / (fps + tps) recall = 1.0 * fns / (fns + tps) fscore = 2.0 * precision * recall / (recall + precision) print precision, recall, fscore avg[0] += tps avg[1] += fps avg[2] += fns #print [a/len(config.pair_types) for a in avg] precision = 1.0 * avg[1] / (avg[0] + avg[1]) recall = 1.0 * avg[2] / (avg[0] + avg[2]) fscore = 2.0 * precision * recall / (recall + precision) print precision, recall, fscore else: res = corpus.train_sentence_classifier(options.ptype) print res corpus.save(paths[options.goldstd[0]]["corpus"]) elif options.actions == "test_sentences": #and evaluate if options.ptype == "all": avg = [0, 0, 0] for p in pair_types: print p tps, fps, fns = corpus.test_sentence_classifier(p) if tps == 0 and fns == 0: precision, recall, fscore = 0, 1, 1 else: precision = 1.0 * tps / (fps + tps) recall = 1.0 * fns / (fns + tps) fscore = 2.0 * precision * recall / (recall + precision) print precision, recall, fscore avg[0] += tps avg[1] += fps avg[2] += fns #print [a/len(config.pair_types) for a in avg] precision = 1.0 * avg[1] / (avg[0] + avg[1]) recall = 1.0 * avg[2] / (avg[0] + avg[2]) fscore = 2.0 * precision * recall / (recall + precision) print precision, recall, fscore #else: # res = corpus.test_sentence_classifier(options.ptype) # print res elif options.actions == "evaluate_ner": if os.path.exists(options.output[1] + ".pickle"): results = pickle.load(open(options.output[1] + ".pickle", 'rb')) results.load_corpus(options.goldstd[0]) results.path = options.output[1] logging.info("loading gold standard %s" % paths[options.goldstd[0]]["annotations"]) for t in all_entity_types: print t results.path = options.output[1] + "_" + t goldset = get_gold_ann_set( paths[options.goldstd[0]]["format"], paths[options.goldstd[0]]["annotations"], t, options.ptype, paths[options.goldstd[0]]["text"]) get_results(results, options.models + "_" + t, goldset[0], {}, {}) corpus.save(paths[options.goldstd[0]]["corpus"]) total_time = time.time() - start_time logging.info("Total time: %ss" % total_time)
#test_features,test_label = relist(predict(test_set)) test_features, test_label = relist(predict(test_set)) #test_features = np.load('./COR_test.npy') #test_label = np.load('./COR_test_label.npy') test_generator = Dataset( test_features, test_label, augment=False, shuffle=False, input_form='t1', seed=seed, ) test_generator.reset() test_results = evaluate.get_results(model, test_generator) probabilities = list(evaluate.transform_binary_probabilities(test_results)) np.save('./test_slice_pro.npy', probabilities) #test 5628 #validation 4593 #train 34585 lg_pred = np.zeros((len(probabilities))) for i in range(len(probabilities)): if probabilities[i] < 0.5: lg_pred[i] = 0 else: lg_pred[i] = 1 print("Accuracy: " + repr(accuracy_score(test_label, lg_pred))) print("Average Precision Score: " + repr(average_precision_score(test_label, lg_pred)))
def main(): start_time = time.time() parser = argparse.ArgumentParser(description='') parser.add_argument("actions", default="classify", help="Actions to be performed.") parser.add_argument("--goldstd", default="", dest="goldstd", nargs="+", help="Gold standard to be used. Will override corpus, annotations", choices=paths.keys()) parser.add_argument("--submodels", default="", nargs='+', help="sub types of classifiers"), parser.add_argument("--models", dest="models", help="model destination path, without extension") parser.add_argument("--pairtype", dest="ptype", help="type of pairs to be considered", default="all") parser.add_argument("--doctype", dest="doctype", help="type of document to be considered", default="all") parser.add_argument("-o", "--output", "--format", dest="output", nargs=2, help="format path; output formats: xml, html, tsv, text, chemdner.") parser.add_argument("--log", action="store", dest="loglevel", default="WARNING", help="Log level") parser.add_argument("--kernel", action="store", dest="kernel", default="svmtk", help="Kernel for relation extraction") options = parser.parse_args() # set logger numeric_level = getattr(logging, options.loglevel.upper(), None) if not isinstance(numeric_level, int): raise ValueError('Invalid log level: %s' % options.loglevel) while len(logging.root.handlers) > 0: logging.root.removeHandler(logging.root.handlers[-1]) logging_format = '%(asctime)s %(levelname)s %(filename)s:%(lineno)s:%(funcName)s %(message)s' logging.basicConfig(level=numeric_level, format=logging_format) logging.getLogger().setLevel(numeric_level) logging.getLogger("requests.packages").setLevel(30) logging.info("Processing action {0} on {1}".format(options.actions, options.goldstd)) # set configuration variables based on the goldstd option if the corpus has a gold standard, # or on corpus and annotation options # pre-processing options if options.actions == "load_corpus": if len(options.goldstd) > 1: print "load only one corpus each time" sys.exit() options.goldstd = options.goldstd[0] corpus_format = paths[options.goldstd]["format"] corpus_path = paths[options.goldstd]["text"] corpus_ann = paths[options.goldstd]["annotations"] corenlp_client = StanfordCoreNLP('http://localhost:9000') # corpus = load_corpus(options.goldstd, corpus_path, corpus_format, corenlp_client) corpus = SeeDevCorpus(corpus_path) corpus.load_corpus(corenlp_client) corpus.save(paths[options.goldstd]["corpus"]) if corpus_ann: #add annotation if it is not a test set corpus.load_annotations(corpus_ann, "all") corpus.save(paths[options.goldstd]["corpus"]) elif options.actions == "annotate": # rext-add annotation to corpus if len(options.goldstd) > 1: print "load only one corpus each time" sys.exit() options.goldstd = options.goldstd[0] corpus_path = paths[options.goldstd]["corpus"] corpus_ann = paths[options.goldstd]["annotations"] logging.info("loading corpus %s" % corpus_path) corpus = pickle.load(open(corpus_path, 'rb')) logging.debug("loading annotations...") # corpus.clear_annotations("all") corpus.load_annotations(corpus_ann, "all", options.ptype) # corpus.get_invalid_sentences() corpus.save(paths[options.goldstd]["corpus"]) else: #corpus = SeeDevCorpus("corpus/" + "&".join(options.goldstd)) corpus_path = paths[options.goldstd[0]]["corpus"] logging.info("loading corpus %s" % corpus_path) basecorpus = pickle.load(open(corpus_path, 'rb')) corpus = SeeDevCorpus(corpus_path) corpus.documents = basecorpus.documents if options.actions == "add_sentences": corpus.add_more_sentences(options.models) elif options.actions == "add_goldstandard": corpus.convert_entities_to_goldstandard() corpus.find_ds_relations() #corpus.save(config.paths[options.goldstd[0]]["corpus"]) elif options.actions == "train_multiple": # Train one classifier for each type of entity in this corpus # logging.info(corpus.subtypes) models = TaggerCollection(basepath=options.models, corpus=corpus, subtypes=all_entity_types) models.train_types() elif options.actions == "train_relations": if options.ptype == "all": ptypes = pair_types.keys() # ptypes = config.event_types.keys() else: ptypes = [options.ptype] for p in ptypes: print p if options.kernel == "jsre": model = JSREKernel(corpus, p, train=True) elif options.kernel == "svmtk": model = SVMTKernel(corpus, p) elif options.kernel == "stanfordre": model = StanfordRE(corpus, p) elif options.kernel == "multir": model = MultiR(corpus, p) elif options.kernel == "scikit": model = ScikitRE(corpus, p) elif options.kernel == "crf": model = CrfSuiteRE(corpus, p) # model.train() # testing elif options.actions == "test_multiple": logging.info("testing with multiple classifiers... {}".format(' '.join(options.submodels))) models = TaggerCollection(basepath=options.models, subtypes=all_entity_types) models.load_models() results = models.test_types(corpus) final_results = results.combine_results() logging.info("saving results...") final_results.save(options.output[1] + ".pickle") elif options.actions == "test_relations": if options.ptype == "all": ptypes = pair_types.keys() # ptypes = config.event_types.keys() all_results = ResultsRE(options.output[1]) all_results.corpus = corpus all_results.path = options.output[1] else: ptypes = [options.ptype] for p in ptypes: print p if options.kernel == "jsre": model = JSREKernel(corpus, p, train=False) elif options.kernel == "svmtk": model = SVMTKernel(corpus, p) elif options.kernel == "rules": model = RuleClassifier(corpus, p) elif options.kernel == "stanfordre": model = StanfordRE(corpus, p) elif options.kernel == "scikit": model = ScikitRE(corpus, p) elif options.kernel == "crf": model = CrfSuiteRE(corpus, p, test=True) model.load_classifier() model.test() results = model.get_predictions(corpus) # results.save(options.output[1] + "_" + p.lower() + ".pickle") # results.load_corpus(options.goldstd[0]) results.path = options.output[1] + "_" + p.lower() goldset = get_gold_ann_set(paths[options.goldstd[0]]["format"], paths[options.goldstd[0]]["annotations"], "all", p, paths[options.goldstd[0]]["text"]) get_relations_results(results, options.models, goldset[1],[], []) if options.ptype == "all": for did in results.document_pairs: if did not in all_results.document_pairs: all_results.document_pairs[did] = Pairs(did=did) all_results.document_pairs[did].pairs += results.document_pairs[did].pairs if options.ptype == "all": goldset = get_gold_ann_set(paths[options.goldstd[0]]["format"], paths[options.goldstd[0]]["annotations"], "all", "all", paths[options.goldstd[0]]["text"]) get_relations_results(all_results, options.models, goldset[1],[], []) write_seedev_results(all_results, options.output[1]) elif options.actions == "train_sentences": #and evaluate if options.ptype == "all": avg = [0,0,0] for p in pair_types: print p tps, fps, fns = corpus.train_sentence_classifier(p) if tps == 0 and fns == 0: precision, recall, fscore = 0, 1, 1 else: precision = 1.0 * tps / (fps + tps) recall = 1.0 * fns / (fns + tps) fscore = 2.0 * precision * recall / (recall + precision) print precision, recall, fscore avg[0] += tps avg[1] += fps avg[2] += fns #print [a/len(config.pair_types) for a in avg] precision = 1.0 * avg[1] / (avg[0] + avg[1]) recall = 1.0 * avg[2] / (avg[0] + avg[2]) fscore = 2.0 * precision * recall / (recall + precision) print precision, recall, fscore else: res = corpus.train_sentence_classifier(options.ptype) print res corpus.save(paths[options.goldstd[0]]["corpus"]) elif options.actions == "test_sentences": #and evaluate if options.ptype == "all": avg = [0,0,0] for p in pair_types: print p tps, fps, fns = corpus.test_sentence_classifier(p) if tps == 0 and fns == 0: precision, recall, fscore = 0, 1, 1 else: precision = 1.0 * tps / (fps + tps) recall = 1.0 * fns / (fns + tps) fscore = 2.0 * precision * recall / (recall + precision) print precision, recall, fscore avg[0] += tps avg[1] += fps avg[2] += fns #print [a/len(config.pair_types) for a in avg] precision = 1.0 * avg[1] / (avg[0] + avg[1]) recall = 1.0 * avg[2] / (avg[0] + avg[2]) fscore = 2.0 * precision * recall / (recall + precision) print precision, recall, fscore #else: # res = corpus.test_sentence_classifier(options.ptype) # print res elif options.actions == "evaluate_ner": if os.path.exists(options.output[1] + ".pickle"): results = pickle.load(open(options.output[1] + ".pickle", 'rb')) results.load_corpus(options.goldstd[0]) results.path = options.output[1] logging.info("loading gold standard %s" % paths[options.goldstd[0]]["annotations"]) for t in all_entity_types: print t results.path = options.output[1] + "_" + t goldset = get_gold_ann_set(paths[options.goldstd[0]]["format"], paths[options.goldstd[0]]["annotations"], t, options.ptype, paths[options.goldstd[0]]["text"]) get_results(results, options.models + "_" + t, goldset[0], {}, {}) corpus.save(paths[options.goldstd[0]]["corpus"]) total_time = time.time() - start_time logging.info("Total time: %ss" % total_time)
def run_crossvalidation(goldstd_list, corpus, model, cv, crf="stanford", entity_type="all", cvlog="cv.log"): logfile = open(cvlog, 'w') doclist = corpus.documents.keys() random.shuffle(doclist) size = int(len(doclist)/cv) sublists = chunks(doclist, size) logging.debug("Chunks:") logging.debug(sublists) p, r = [], [] all_results = ResultsNER(model) all_results.path = model + "_results" for nlist in range(cv): testids, trainids = None, None testids = sublists[nlist] trainids = list(itertools.chain.from_iterable(sublists[:nlist])) trainids += list(itertools.chain.from_iterable(sublists[nlist+1:])) train_corpus, test_corpus = None, None print 'CV{} - test set: {}; train set: {}'.format(nlist, len(testids), len(trainids)) train_corpus = Corpus(corpus.path + "_train", documents={did: corpus.documents[did] for did in trainids}) test_corpus = Corpus(corpus.path + "_test", documents={did: corpus.documents[did] for did in testids}) # logging.debug("train corpus docs: {}".format("\n".join(train_corpus.documents.keys()))) #test_entities = len(test_corpus.get_all_entities("goldstandard")) #train_entities = len(train_corpus.get_all_entities("goldstandard")) #logging.info("test set entities: {}; train set entities: {}".format(test_entities, train_entities)) basemodel = model + "_cv{}".format(nlist) logging.debug('CV{} - test set: {}; train set: {}'.format(nlist, len(test_corpus.documents), len(train_corpus.documents))) '''for d in train_corpus.documents: for s in train_corpus.documents[d].sentences: print len([t.tags.get("goldstandard") for t in s.tokens if t.tags.get("goldstandard") != "other"]) sys.exit()''' # train logging.info('CV{} - TRAIN'.format(nlist)) # train_model = StanfordNERModel(basemodel) train_model = None if crf == "stanford": train_model = StanfordNERModel(basemodel, entity_type) elif crf == "crfsuite": train_model = CrfSuiteModel(basemodel, entity_type) train_model.load_data(train_corpus, feature_extractors.keys()) train_model.train() # test logging.info('CV{} - TEST'.format(nlist)) test_model = None if crf == "stanford": test_model = StanfordNERModel(basemodel, entity_type) elif crf == "crfsuite": test_model = CrfSuiteModel(basemodel, entity_type) test_model.load_tagger(port=9191+nlist) test_model.load_data(test_corpus, feature_extractors.keys(), mode="test") final_results = None final_results = test_model.test(test_corpus, port=9191+nlist) if crf == "stanford": test_model.kill_process() final_results.basepath = basemodel + "_results" final_results.path = basemodel all_results.entities.update(final_results.entities) all_results.corpus.documents.update(final_results.corpus.documents) # validate """if config.use_chebi: logging.info('CV{} - VALIDATE'.format(nlist)) final_results = add_chebi_mappings(final_results, basemodel) final_results = add_ssm_score(final_results, basemodel) final_results.combine_results(basemodel, basemodel)""" # evaluate logging.info('CV{} - EVALUATE'.format(nlist)) test_goldset = set() for gs in goldstd_list: goldset = get_gold_ann_set(config.corpus_paths.paths[gs]["format"], config.corpus_paths.paths[gs]["annotations"], entity_type, "pairtype", config.corpus_paths.paths[gs]["text"]) for g in goldset[0]: if g[0] in testids: test_goldset.add(g) precision, recall = get_results(final_results, basemodel, test_goldset, {}, []) # evaluation = run_chemdner_evaluation(config.paths[goldstd]["cem"], basemodel + "_results.txt", "-t") # values = evaluation.split("\n")[1].split('\t') p.append(precision) r.append(recall) # logging.info("precision: {} recall:{}".format(str(values[13]), str(values[14]))) pavg = sum(p)/cv ravg = sum(r)/cv print "precision: average={} all={}".format(str(pavg), '|'.join([str(pp) for pp in p])) print "recall: average={} all={}".format(str(ravg), '|'.join([str(rr) for rr in r])) all_goldset = set() for gs in goldstd_list: goldset = get_gold_ann_set(config.corpus_paths.paths[gs]["format"], config.corpus_paths.paths[gs]["annotations"], entity_type, "", config.corpus_paths.paths[gs]["text"]) for g in goldset[0]: all_goldset.add(g) get_results(all_results, model, all_goldset, {}, [])