def main(): # start timer start = time.clock() # Parse incoming cmd line arguments args = ArgumentParsingSettings.get_local_predict_args() data_dir = args.datadir model_type = args.model_type # Section raw documents sectioner_out_dir = uw_sectioner(data_dir) # Load sectioned docs xml_dl = SectionerXMLDataLoader(xml_dir=sectioner_out_dir, clean_tmp_files=True) docs = xml_dl.load() # Perform NER on sectioned docs extractor = NERExtraction(docs, model_algo=model_type) tagged_documents = extractor.tag_all() tagged_documents = extractor.remove_negated_concepts(tagged_documents) # Print full docs dp = HTMLPrinter() dp.write_readable_prediction_results( tagged_documents, "/home/wlane/PycharmProjects/HutchNER/HutchNER/NERResults", model_algo=model_type) end = time.clock() print("##################################") print(" \tTime Elapsed: " + str(int((end - start) / 60)) + " minutes and " + str(int((end - start) % 60)) + " seconds.") print("##################################")
def main(documents, model_type, models): text_dl = JSONDataLoader(documents=documents) docs = text_dl.preprocess(spacy_model=models['spacy']) extractor = NERExtraction(docs, model_algo=model_type) tagged_documents = extractor.tag_all(models) tagged_documents = extractor.remove_negated_concepts(tagged_documents) json_response = extractor.docs2json(tagged_documents) return json_response
def main(): """ Entry point to HutchNER1: Concept NERExtraction Training """ # start timer start = time.clock() # Parse incoming cmd line arguments args = ArgumentParsingSettings.get_testing_args() text_dir = args.textdir local_annotations = args.annots labkey_ini_section = args.section model_name = args.model model_type = args.model_type anno_type = args.anno_type print('model_name:') print(model_name) # Load the documents if anno_type == 'i2b2': text_dl = i2b2DataLoader(txt_dir=text_dir, annotation_dir=local_annotations) else: text_dl = bratDataLoader(txt_dir=text_dir, annotation_dir=local_annotations) docs = text_dl.load() # Run NER driver with models and data provided in dirs extractor = NERExtraction(docs, model_name, model_type) tagged_documents = extractor.tag_all(models=models) neg_documents = extractor.remove_negated_concepts(tagged_documents) # Evaluate the performance on TAGGED DOCUMENTS (not the negated ones) labels = extractor.possible_labels ev = NEREvaluator(tagged_documents, labels) # use timestamp to link output labels and files to output results numbers time_stamp = time.time() string_timestamp = datetime.datetime.fromtimestamp(time_stamp).strftime( '%Y-%m-%d_%H.%M.%S') ev.output_labels("OutputLabels", tagged_documents, model_name, string_timestamp) ev.write_results("EvalResults", strictness="exact", model_name=model_name, string_timestamp=string_timestamp) ev.write_results("EvalResults", strictness="overlap", model_name=model_name, string_timestamp=string_timestamp) # Print time elapsed to console end = time.clock() print("##################################") print(" \tTime Elapsed: " + str(int((end - start) / 60)) + " minutes and " + str(int((end - start) % 60)) + " seconds.") print("##################################")
def main(): """ Entry point to HutchNER1: Concept NERExtraction Training """ # start timer start = time.clock() # Parse incoming cmd line arguments args = ArgumentParsingSettings.get_testing_args() data_dir = args.datadir model_dir = args.model_dir local_annotations = args.annots labkey_ini_section = args.section # Load the documents text_dl = i2b2DataLoader(txt_dir=data_dir, annotation_dir=local_annotations) docs = text_dl.load() # Run NER driver with models and data provided in dirs extractor = NERExtraction(docs) tagged_documents = extractor.tag_all() neg_documents = extractor.remove_negated_concepts(tagged_documents) # Create DocumentPrinter object; print/write document objects in desired format dp = HTMLPrinter() dp.write_readable_prediction_results( neg_documents, "/home/wlane/PycharmProjects/HutchNER1/HutchNER1/NERResults") # Evaluate the performance on TAGGED DOCUMENTS (not the negated ones) labels = extractor.possible_labels ev = NEREvaluator(tagged_documents, labels) ev.write_results( "/home/wlane/PycharmProjects/HutchNER1/HutchNER1/NEREvaluation/EvalResults", strictness="exact") ev.write_results( "/home/wlane/PycharmProjects/HutchNER1/HutchNER1/NEREvaluation/EvalResults", strictness="overlap") # Print time elapsed to console end = time.clock() print "##################################" print " \tTime Elapsed: " + str(int( (end - start) / 60)) + " minutes and " + str(int( (end - start) % 60)) + " seconds." print "##################################"