def eval_mst(model_path, test_path, out_prefix, lowercase=True, tagger=None, force=False, result_strem=None): mp = MSTParser() # ------------------------------------------- # Use the output prefix to create some new files. # ------------------------------------------- eval_path = out_prefix + '_eval_tagged.txt' out_path = out_prefix + '_out_tagged.txt' # ------------------------------------------- # Rewrite the test file; POS tag the data # with the POS tags from our tagger, # and strip features. # ------------------------------------------- if not os.path.exists(eval_path) or force: LOG.log(1000, "") cc = ConllCorpus.read(test_path, lowercase=True) if lowercase: cc.lower() cc.strip_tags() cc.strip_feats() if tagger is not None: LOG.log(1000, "POS Tagging evaluation ") cc.tag(StanfordPOSTagger(tagger)) os.makedirs(os.path.dirname(eval_path), exist_ok=True) cc.write(eval_path) # ------------------------------------------- mp.test(model_path, eval_path, out_path) eval_conll_paths(test_path, out_path)
eval_path = ef.get_eval_file(lang) out_prefix = ef.get_out_prefix(lang, aln_method, pos_source) if not os.path.exists(eval_path) or not os.path.exists(out_prefix): # if USE_CONDOR: if False: prefix, name = ef.get_condor_result(aln_method, pos_source, lang) eval_script = os.path.join(intent_dir, 'intent/scripts/eval/dep_parser.py') run_cmd([p3path, eval_script, 'test', '-p', parser_path, '-t', tagger_path, '--test', eval_path, '-o', out_prefix], prefix, name, False, env='PYTHONPATH={}'.format(intent_dir)) else: # eval_mst(parser_path, eval_path, out_prefix, tagger=tagger_path) out_path = os.path.join(out_prefix + '_out_tagged.txt') if os.path.exists(out_path): ce = eval_conll_paths(eval_path, out_path) de_short.add(lang, aln_method, pos_source, ce.short_ul(), ce.short_ul_count(), ce.short_words()) de_long.add(lang, aln_method, pos_source, ce.long_ul(), ce.long_ul_count(), ce.long_words()) if USE_CONDOR: condor_wait_notify("Evaluation completed.", email_address, "CONDOR: Evaluation complete.") de_short.print_stats(ARG_POS_PROJ) de_long.print_stats(ARG_POS_PROJ) de_short.print_stats(ARG_POS_CLASS) de_long.print_stats(ARG_POS_CLASS)