Example #1
0
def main():
    # start timer
    start = time.clock()

    # Parse incoming cmd line arguments
    args = ArgumentParsingSettings.get_local_predict_args()
    data_dir = args.datadir
    model_type = args.model_type

    # Section raw documents
    sectioner_out_dir = uw_sectioner(data_dir)

    # Load sectioned docs
    xml_dl = SectionerXMLDataLoader(xml_dir=sectioner_out_dir,
                                    clean_tmp_files=True)
    docs = xml_dl.load()

    # Perform NER on sectioned docs
    extractor = NERExtraction(docs, model_algo=model_type)
    tagged_documents = extractor.tag_all()
    tagged_documents = extractor.remove_negated_concepts(tagged_documents)

    # Print full docs
    dp = HTMLPrinter()
    dp.write_readable_prediction_results(
        tagged_documents,
        "/home/wlane/PycharmProjects/HutchNER/HutchNER/NERResults",
        model_algo=model_type)

    end = time.clock()
    print("##################################")
    print(" \tTime Elapsed: " + str(int((end - start) / 60)) +
          " minutes and " + str(int((end - start) % 60)) + " seconds.")
    print("##################################")
Example #2
0
def main(documents, model_type, models):
    text_dl = JSONDataLoader(documents=documents)
    docs = text_dl.preprocess(spacy_model=models['spacy'])

    extractor = NERExtraction(docs, model_algo=model_type)
    tagged_documents = extractor.tag_all(models)
    tagged_documents = extractor.remove_negated_concepts(tagged_documents)
    json_response = extractor.docs2json(tagged_documents)

    return json_response
Example #3
0
def main():
    """ Entry point to HutchNER1: Concept NERExtraction Training """
    # start timer
    start = time.clock()

    # Parse incoming cmd line arguments
    args = ArgumentParsingSettings.get_testing_args()
    text_dir = args.textdir
    local_annotations = args.annots
    labkey_ini_section = args.section
    model_name = args.model
    model_type = args.model_type
    anno_type = args.anno_type
    print('model_name:')
    print(model_name)
    # Load the documents
    if anno_type == 'i2b2':
        text_dl = i2b2DataLoader(txt_dir=text_dir,
                                 annotation_dir=local_annotations)
    else:
        text_dl = bratDataLoader(txt_dir=text_dir,
                                 annotation_dir=local_annotations)
    docs = text_dl.load()

    # Run NER driver with models and data provided in dirs
    extractor = NERExtraction(docs, model_name, model_type)
    tagged_documents = extractor.tag_all(models=models)
    neg_documents = extractor.remove_negated_concepts(tagged_documents)

    # Evaluate the performance on TAGGED DOCUMENTS (not the negated ones)
    labels = extractor.possible_labels
    ev = NEREvaluator(tagged_documents, labels)

    # use timestamp to link output labels and files to output results numbers
    time_stamp = time.time()
    string_timestamp = datetime.datetime.fromtimestamp(time_stamp).strftime(
        '%Y-%m-%d_%H.%M.%S')

    ev.output_labels("OutputLabels", tagged_documents, model_name,
                     string_timestamp)
    ev.write_results("EvalResults",
                     strictness="exact",
                     model_name=model_name,
                     string_timestamp=string_timestamp)
    ev.write_results("EvalResults",
                     strictness="overlap",
                     model_name=model_name,
                     string_timestamp=string_timestamp)

    # Print time elapsed to console
    end = time.clock()
    print("##################################")
    print(" \tTime Elapsed: " + str(int((end - start) / 60)) +
          " minutes and " + str(int((end - start) % 60)) + " seconds.")
    print("##################################")
Example #4
0
def main():
    """ Entry point to HutchNER1: Concept NERExtraction Training """
    # start timer
    start = time.clock()

    # Parse incoming cmd line arguments
    args = ArgumentParsingSettings.get_testing_args()
    data_dir = args.datadir
    model_dir = args.model_dir
    local_annotations = args.annots
    labkey_ini_section = args.section

    # Load the documents
    text_dl = i2b2DataLoader(txt_dir=data_dir,
                             annotation_dir=local_annotations)
    docs = text_dl.load()

    # Run NER driver with models and data provided in dirs
    extractor = NERExtraction(docs)
    tagged_documents = extractor.tag_all()
    neg_documents = extractor.remove_negated_concepts(tagged_documents)

    # Create DocumentPrinter object; print/write document objects in desired format
    dp = HTMLPrinter()
    dp.write_readable_prediction_results(
        neg_documents,
        "/home/wlane/PycharmProjects/HutchNER1/HutchNER1/NERResults")

    # Evaluate the performance on TAGGED DOCUMENTS (not the negated ones)
    labels = extractor.possible_labels
    ev = NEREvaluator(tagged_documents, labels)
    ev.write_results(
        "/home/wlane/PycharmProjects/HutchNER1/HutchNER1/NEREvaluation/EvalResults",
        strictness="exact")
    ev.write_results(
        "/home/wlane/PycharmProjects/HutchNER1/HutchNER1/NEREvaluation/EvalResults",
        strictness="overlap")

    # Print time elapsed to console
    end = time.clock()
    print "##################################"
    print " \tTime Elapsed: " + str(int(
        (end - start) / 60)) + " minutes and " + str(int(
            (end - start) % 60)) + " seconds."
    print "##################################"