def postag(ef: ExperimentFiles, overwrite=False): tagging_performed = False for tc in ef.tagger_configs(): if not os.path.exists(tc.tagged()) or overwrite: if USE_CONDOR: args = tc.tag_args(True) condorify(*args) else: args = tc.tag_args(False) if tc.method == ARG_POS_CLASS: intent.commands.enrich.enrich(**args) else: do_projection(**args) tagging_performed = True if USE_CONDOR and (tagging_performed): if condor_email: condor.condor_wait_notify("Data has been tagged.", condor_email, "CONDOR: Tagging Complete") else: condor.condor_wait()
def test_inst_1(self): kwargs={ARG_INFILE:ps_path, ARG_OUTFILE:'/dev/null', 'aln_method':ARG_ALN_HEUR} do_projection(**kwargs)
def test_inst_2(self): xp = xigt_testfile('xigt-projection-tests.xml') xc = xc_load(xp) do_projection(**{ARG_INFILE:xp, 'aln_method':ARG_ALN_GIZA, ARG_OUTFILE:'/dev/null'})
enriched_f = ef.get_enriched_file(lang) projected_f = ef.get_projected_file(aln_method, lang) if not os.path.exists(projected_f): projection_done = True if USE_CONDOR: model_prefix, name = ef.get_condor_project(aln_method, lang) run_cmd([p3path, intent_script, 'project', '--aln-method', aln_method, '--completeness', '0.0', enriched_f, projected_f], model_prefix, name, False) else: # p = Popen(['intent.py', 'project', '--aln-method', aln_method, enriched_f, projected_f, '-v'], env={"PATH":os.getenv("PATH")+':/Users/rgeorgi/Documents/code/intent'}) # p.wait() print(enriched_f) do_projection(**{ARG_INFILE:enriched_f, 'aln_method':aln_method, ARG_OUTFILE:projected_f, 'completeness':0.0}) if USE_CONDOR and projection_done: condor_wait_notify("Data has been projected.", email_address, "CONDOR: Projection Complete.") # ------------------------------------------- # 4) Now, extract the parsers. # ------------------------------------------- extraction_done = False for lang in ef.langs: for aln_method in aln_methods: for pos_source in pos_methods: model_prefix = ef.get_model_prefix(aln_method, pos_source, lang) tagger_path = ef.get_tagger(aln_method, pos_source, lang) parser_path = ef.get_tagger(aln_method, pos_source, lang)
elif args.subcommand == CMD_FILTER: filter_corpus(flatten_list(getattr(args, ARG_INFILE)), getattr(args, ARG_OUTFILE), **vars(args)) # EXTRACT elif args.subcommand == CMD_EXTRACT: extract_from_xigt(input_filelist=flatten_list(args.FILE), **vars(args)) # EVAL elif args.subcommand == CMD_EVAL: evaluate_intent(flatten_list(args.FILE), eval_alignment=args.alignment, eval_ds=args.ds_projection, eval_posproj=args.pos_projection, classifier_path=args.classifier, classifier_feats=args.classifier_feats, eval_tagger=args.pos_tagger, gold_tagmap=args.tagmap_gold, trans_tagmap=args.tagmap_trans, outpath=args.output) # TEXT CONVERT elif args.subcommand == CMD_TEXT: xc = text_to_xigtxml(args.FILE) dump(args.OUT_FILE, xc) # PROJECT elif args.subcommand == CMD_PROJECT: do_projection(**vars(args)) # REPRO elif args.subcommand == CMD_REPRO: reproduce(args.action)