コード例 #1
0
 def transformer_factory(args):
     if args.transformer_props is not None:
         with open(args.transformer_props, 'r', encoding='utf-8') as f:
             props = json.load(f)
     else:
         props = {}
     return transformer_from_props(props)
コード例 #2
0
def main():
    args = create_argparser().parse_args()
    base_props, props_to_evaluate = get_props_to_evaluate(args)
    dataset, unlabeled_docs = load_docs(args)
    props_picker, props_best_results = get_best_model_picker(
        history_improvement_controller(len(props_to_evaluate)))

    if not props_to_evaluate:
        print("No props found")
        return

    if os.path.exists(args.out_dir) and (not os.path.isdir(args.out_dir)
                                         or os.listdir(args.out_dir)):
        print("Output path should either not exists or be empty directory")
        return

    for props_idx, props in enumerate(props_to_evaluate):
        cur_props_path = join(args.out_dir, f'props_{props_idx}')
        os.makedirs(cur_props_path, exist_ok=True)
        dump_dict_as_json(props, join(cur_props_path, 'props.json'))

        with transformer_from_props(props.get("transformers", {})) as t:
            tr_dataset = dataset.transformed_by(t)
            tr_unlabeled_docs = [t.transform(doc) for doc in unlabeled_docs
                                 ] if unlabeled_docs is not None else None

        mean_main_score, mean_scores = splits_cycle(args.task_name, args.seeds,
                                                    props, props_idx,
                                                    tr_dataset,
                                                    tr_unlabeled_docs,
                                                    cur_props_path)
        props_picker(mean_main_score, mean_scores, lambda: None)

    best_props_idx = props_best_results.best_score_idx
    best_main_score, best_scores = props_best_results.best_scores
    print(
        f"Overall experiment best score: {best_main_score:.4f}, props: #{best_props_idx}"
    )

    best_props_path = join(args.out_dir, 'best_props')
    os.makedirs(best_props_path)

    for split_idx in range(dataset.splits_number):
        split_path = join(args.out_dir, f'props_{best_props_idx}',
                          f'split_{split_idx}')
        split_best_seed = join(split_path, 'best_seed')

        shutil.copytree(split_best_seed,
                        join(best_props_path, f'split_{split_idx}'))
        shutil.copy(join(split_path, 'mean_results.json'),
                    join(best_props_path, f'split_{split_idx}'))

    dump_dict_as_json({
        **best_scores, "props_num": best_props_idx
    }, join(args.out_dir, "best_results.json"))
    dump_dict_as_json(
        get_experiments_report(props_best_results, base_props,
                               props_to_evaluate),
        join(args.out_dir, "experiments_report.json"))
コード例 #3
0
def lemmatize(input_path, output_path, transformers_props_path):
    with open(transformers_props_path, 'r', encoding='utf-8') as f, \
            transformer_from_props(json.load(f)) as transformer, \
            open(input_path, 'r', encoding='utf-8') as readfile, \
            open(output_path, 'w', encoding='utf-8', newline='\n') as outfile:
        for line in readfile:
            lemma = _get_lemma(line.strip(), transformer)
            outfile.write(lemma + "\n")
コード例 #4
0
def main():
    argparser = argparse.ArgumentParser(description='HTTP server for DEREK')
    argparser.add_argument('-remote',
                           dest='remote',
                           action='store_true',
                           help='should listen for remote connections')
    argparser.add_argument('-port',
                           type=int,
                           dest='port',
                           metavar='<port number>',
                           required=False,
                           help='port to listen on')
    argparser.add_argument('-ner',
                           type=str,
                           dest='ner_path',
                           metavar='<NER model path>',
                           required=False,
                           help='path to NER model')
    argparser.add_argument('-rel_ext',
                           type=str,
                           dest='rel_ext_path',
                           metavar='<rel_ext model path>',
                           required=False,
                           help='path to rel_ext model')
    argparser.add_argument('-transformer_props',
                           type=str,
                           dest='transformer_props',
                           metavar='<transformers.json>',
                           required=False,
                           help='path to transformer props')
    parsers = {"main": argparser}
    parsers, segmentor_factory = init_segmenter_argparser(
        parsers, parsers.keys())

    args = argparser.parse_args()
    host = "0.0.0.0" if args.remote else None

    if args.transformer_props is not None:
        with open(args.transformer_props, 'r', encoding='utf-8') as f:
            transformer_props = json.load(f)
    else:
        transformer_props = {}

    segmenter = segmentor_factory(args)

    with transformer_from_props(transformer_props) as transformer:
        safe_with_clf(
            'ner', args.ner_path, lambda ent_clf: safe_with_clf(
                'rel_ext', args.rel_ext_path, lambda rel_clf: run_app(
                    host, args.port, segmenter, transformer, ent_clf, rel_clf))
        )
コード例 #5
0
ファイル: evaluate.py プロジェクト: wayne9qiu/derek
def main():
    args = create_argparser().parse_args()
    docs = load(args.docs_path)
    evaluator = evaluator_for(args.task_name)

    if args.transformers_props_path is not None:
        with open(args.transformers_props_path, "r", encoding="utf-8") as f, \
                transformer_from_props(json.load(f)) as transformer:
            docs = [transformer.transform(doc) for doc in docs]

    if args.strategy == "holdout":
        folds_num = 1
        models = [args.model_path]
    else:
        folds_num = len(args.splits_model_paths)
        models = args.splits_model_paths

    main_scores = []
    for split_idx, model_path in enumerate(models):
        _, test_docs = get_fold(docs, folds_num, split_idx)

        with classifier_for(args.task_name)(model_path) as clf:
            main_score, scores, stats_generator = evaluator(
                clf, test_docs, args.stats_path is not None)
            main_scores.append(main_score)

            print("Split {}, Main score={:.4f}".format(split_idx, main_score))
            print(
                f"Scores: \n{json.dumps(scores, indent=4, sort_keys=True)}\n")

            if stats_generator is not None:
                stats_path = join(args.stats_path, f"split_{split_idx}")
                makedirs(stats_path, exist_ok=True)

                for doc_idx, doc in enumerate(test_docs):
                    with open(join(stats_path, doc.name + '_stats.txt'),
                              'w',
                              encoding='utf-8') as f:

                        f.write(stats_generator(doc_idx))

    print("\nMean splits score={:.4f}".format(
        sum(main_scores) / len(main_scores)))
コード例 #6
0
ファイル: bionlp_submit.py プロジェクト: wayne9qiu/derek
def main():
    if len(sys.argv) < 4:
        print(
            "Usage: <model-path> <test-path> <out-path> <transformers-props-path>"
        )
        return
    model_path = sys.argv[1]
    docs = load(sys.argv[2])
    out_path = sys.argv[3]
    transformers_props_path = sys.argv[4] if len(sys.argv) > 4 else None

    if transformers_props_path is not None:
        with open(transformers_props_path, 'r', encoding='utf-8') as f, \
                transformer_from_props(json.load(f)) as transformer:
            docs = [transformer.transform(doc) for doc in docs]

    with RelExtClassifier(model_path) as classifier:
        rels = classifier.predict_docs(docs)

    write_relations(rels, out_path)
コード例 #7
0
def main():
    parser, segmenter_factory = build_argparser()
    args = parser.parse_args()
    segmenter = segmenter_factory(args)

    model_path = args.model_path
    raw_text_reader = RawTextReader(segmenter=segmenter)
    docs = raw_text_reader.read(args.test_path, path_walker)
    out_path = args.out_path
    transformers_props_path = args.transformers_props_path

    if transformers_props_path is not None:
        with open(transformers_props_path, 'r', encoding='utf-8') as f, \
                transformer_from_props(json.load(f)) as transformer:
            docs = [transformer.transform(doc) for doc in docs]

    with ChainedNERClassifier(model_path) as classifier:
        for doc in docs:
            entities = classifier.predict_doc(doc)
            write_ner_results(doc, entities, out_path)