def transformer_factory(args): if args.transformer_props is not None: with open(args.transformer_props, 'r', encoding='utf-8') as f: props = json.load(f) else: props = {} return transformer_from_props(props)
def main(): args = create_argparser().parse_args() base_props, props_to_evaluate = get_props_to_evaluate(args) dataset, unlabeled_docs = load_docs(args) props_picker, props_best_results = get_best_model_picker( history_improvement_controller(len(props_to_evaluate))) if not props_to_evaluate: print("No props found") return if os.path.exists(args.out_dir) and (not os.path.isdir(args.out_dir) or os.listdir(args.out_dir)): print("Output path should either not exists or be empty directory") return for props_idx, props in enumerate(props_to_evaluate): cur_props_path = join(args.out_dir, f'props_{props_idx}') os.makedirs(cur_props_path, exist_ok=True) dump_dict_as_json(props, join(cur_props_path, 'props.json')) with transformer_from_props(props.get("transformers", {})) as t: tr_dataset = dataset.transformed_by(t) tr_unlabeled_docs = [t.transform(doc) for doc in unlabeled_docs ] if unlabeled_docs is not None else None mean_main_score, mean_scores = splits_cycle(args.task_name, args.seeds, props, props_idx, tr_dataset, tr_unlabeled_docs, cur_props_path) props_picker(mean_main_score, mean_scores, lambda: None) best_props_idx = props_best_results.best_score_idx best_main_score, best_scores = props_best_results.best_scores print( f"Overall experiment best score: {best_main_score:.4f}, props: #{best_props_idx}" ) best_props_path = join(args.out_dir, 'best_props') os.makedirs(best_props_path) for split_idx in range(dataset.splits_number): split_path = join(args.out_dir, f'props_{best_props_idx}', f'split_{split_idx}') split_best_seed = join(split_path, 'best_seed') shutil.copytree(split_best_seed, join(best_props_path, f'split_{split_idx}')) shutil.copy(join(split_path, 'mean_results.json'), join(best_props_path, f'split_{split_idx}')) dump_dict_as_json({ **best_scores, "props_num": best_props_idx }, join(args.out_dir, "best_results.json")) dump_dict_as_json( get_experiments_report(props_best_results, base_props, props_to_evaluate), join(args.out_dir, "experiments_report.json"))
def lemmatize(input_path, output_path, transformers_props_path): with open(transformers_props_path, 'r', encoding='utf-8') as f, \ transformer_from_props(json.load(f)) as transformer, \ open(input_path, 'r', encoding='utf-8') as readfile, \ open(output_path, 'w', encoding='utf-8', newline='\n') as outfile: for line in readfile: lemma = _get_lemma(line.strip(), transformer) outfile.write(lemma + "\n")
def main(): argparser = argparse.ArgumentParser(description='HTTP server for DEREK') argparser.add_argument('-remote', dest='remote', action='store_true', help='should listen for remote connections') argparser.add_argument('-port', type=int, dest='port', metavar='<port number>', required=False, help='port to listen on') argparser.add_argument('-ner', type=str, dest='ner_path', metavar='<NER model path>', required=False, help='path to NER model') argparser.add_argument('-rel_ext', type=str, dest='rel_ext_path', metavar='<rel_ext model path>', required=False, help='path to rel_ext model') argparser.add_argument('-transformer_props', type=str, dest='transformer_props', metavar='<transformers.json>', required=False, help='path to transformer props') parsers = {"main": argparser} parsers, segmentor_factory = init_segmenter_argparser( parsers, parsers.keys()) args = argparser.parse_args() host = "0.0.0.0" if args.remote else None if args.transformer_props is not None: with open(args.transformer_props, 'r', encoding='utf-8') as f: transformer_props = json.load(f) else: transformer_props = {} segmenter = segmentor_factory(args) with transformer_from_props(transformer_props) as transformer: safe_with_clf( 'ner', args.ner_path, lambda ent_clf: safe_with_clf( 'rel_ext', args.rel_ext_path, lambda rel_clf: run_app( host, args.port, segmenter, transformer, ent_clf, rel_clf)) )
def main(): args = create_argparser().parse_args() docs = load(args.docs_path) evaluator = evaluator_for(args.task_name) if args.transformers_props_path is not None: with open(args.transformers_props_path, "r", encoding="utf-8") as f, \ transformer_from_props(json.load(f)) as transformer: docs = [transformer.transform(doc) for doc in docs] if args.strategy == "holdout": folds_num = 1 models = [args.model_path] else: folds_num = len(args.splits_model_paths) models = args.splits_model_paths main_scores = [] for split_idx, model_path in enumerate(models): _, test_docs = get_fold(docs, folds_num, split_idx) with classifier_for(args.task_name)(model_path) as clf: main_score, scores, stats_generator = evaluator( clf, test_docs, args.stats_path is not None) main_scores.append(main_score) print("Split {}, Main score={:.4f}".format(split_idx, main_score)) print( f"Scores: \n{json.dumps(scores, indent=4, sort_keys=True)}\n") if stats_generator is not None: stats_path = join(args.stats_path, f"split_{split_idx}") makedirs(stats_path, exist_ok=True) for doc_idx, doc in enumerate(test_docs): with open(join(stats_path, doc.name + '_stats.txt'), 'w', encoding='utf-8') as f: f.write(stats_generator(doc_idx)) print("\nMean splits score={:.4f}".format( sum(main_scores) / len(main_scores)))
def main(): if len(sys.argv) < 4: print( "Usage: <model-path> <test-path> <out-path> <transformers-props-path>" ) return model_path = sys.argv[1] docs = load(sys.argv[2]) out_path = sys.argv[3] transformers_props_path = sys.argv[4] if len(sys.argv) > 4 else None if transformers_props_path is not None: with open(transformers_props_path, 'r', encoding='utf-8') as f, \ transformer_from_props(json.load(f)) as transformer: docs = [transformer.transform(doc) for doc in docs] with RelExtClassifier(model_path) as classifier: rels = classifier.predict_docs(docs) write_relations(rels, out_path)
def main(): parser, segmenter_factory = build_argparser() args = parser.parse_args() segmenter = segmenter_factory(args) model_path = args.model_path raw_text_reader = RawTextReader(segmenter=segmenter) docs = raw_text_reader.read(args.test_path, path_walker) out_path = args.out_path transformers_props_path = args.transformers_props_path if transformers_props_path is not None: with open(transformers_props_path, 'r', encoding='utf-8') as f, \ transformer_from_props(json.load(f)) as transformer: docs = [transformer.transform(doc) for doc in docs] with ChainedNERClassifier(model_path) as classifier: for doc in docs: entities = classifier.predict_doc(doc) write_ner_results(doc, entities, out_path)