def test_level2(sourceword,target): bestoutfn = "../L2output/{0}.{1}.best".format(sourceword, target) oofoutfn = "../L2output/{0}.{1}.oof".format(sourceword, target) bestoutfile = open(bestoutfn,'w') oofoutfile = open(oofoutfn,'w') level2_classifier = util_run_experiment.get_pickled_classifier(sourceword,target,'level2') frd1,frd2,frd3,frd4 = sorted(list(get_four_friends(target))) ##Need 4 more features from level1. classfrd1,classfrd2,classfrd3,classfrd4 = get_level1_classifiers(frd1,frd2,frd3,frd4,sourceword) # finaldir = "../trialdata/alltrials/" finaldir = "../finaltest" problems = util_run_experiment.get_test_instances(finaldir, sourceword) for problem in problems: level1_features = features.extract(problem) answer_frd1 = classfrd1.classify(level1_features) answer_frd2 = classfrd2.classify(level1_features) answer_frd3 = classfrd3.classify(level1_features) answer_frd4 = classfrd4.classify(level1_features) level2_features = train_extracted_level2.extend_features(level1_features,(answer_frd1,answer_frd2,answer_frd3,answer_frd4),frd1,frd2,frd3,frd4) level2_answer = level2_classifier.classify(level2_features) level2_dist = level2_classifier.prob_classify(level2_features) oof_answers = util_run_experiment.topfive(level2_dist) print(output_one_best(problem, target, level2_answer), file=bestoutfile) print(output_five_best(problem, target, oof_answers), file=oofoutfile)
def main(): parser = util_run_experiment.get_argparser() args = parser.parse_args() assert args.targetlang in all_target_languages assert args.model in ["bigram", "trigram"] targetlang = args.targetlang trialdir = args.trialdir tt_home = args.treetaggerhome model = args.model ## models from the HMM. picklefn = "pickles/{0}.lm_{1}.pickle".format(targetlang, model) with open(picklefn, "rb") as infile: lm = pickle.load(infile) picklefn = "pickles/{0}.emit.pickle".format(targetlang) with open(picklefn, "rb") as infile: emissions = pickle.load(infile) cfd = learn.reverse_cfd(emissions) emissions = learn.cpd(emissions) hmmparts = HMMParts(lm, emissions, cfd) for sourceword in util_run_experiment.final_test_words: print("Loading test problems for {0}".format(sourceword)) problems = util_run_experiment.get_test_instances(trialdir, sourceword) bestoutfn = "MEMMoutput_{0}/{1}.{2}.best".format( model, sourceword, targetlang) with open(bestoutfn, "w") as bestoutfile: for problem in problems: answer = classify_for_memm(problem, targetlang, tt_home, hmmparts, model) print(output_one_best(problem, targetlang, answer), file=bestoutfile)
def main(): parser = mrf_get_argparser() args = parser.parse_args() assert args.sourceword in all_words sourceword = args.sourceword trialdir = args.trialdir stanford.taggerhome = args.taggerhome ## load up the cooccurrences. print("Loading cooccurrence information.") build_cooccurrences(sourceword) for lang in all_target_languages: classifier = get_pickled_classifier(sourceword, lang, "level1") classifiers[lang] = classifier if not classifier: print("Couldn't load pickled L1 classifier?") return print("Loaded pickled L1 classifiers!") print("Loading and tagging test problems...") problems = util_run_experiment.get_test_instances(trialdir, sourceword) print("OK loaded and tagged.") outfiles = {} for lang in all_target_languages: bestoutfn = "../MRFoutput/{0}.{1}.best".format(sourceword, lang) oofoutfn = "../MRFoutput/{0}.{1}.oof".format(sourceword, lang) bestout = open(bestoutfn, "w") oofout = open(oofoutfn, "w") outfiles[lang + ".best"] = bestout outfiles[lang + ".oof"] = oofout for problem in problems: ## these are dictionaries. answers, oof_answers = mrf_optimize(problem) for lang in answers: answer = answers[lang] outfile = outfiles[lang + ".best"] print(output_one_best(problem, lang, answer), file=outfile) outfile = outfiles[lang + ".oof"] topfive = oof_answers[lang] print(output_five_best(problem, lang, topfive), file=outfile) ## now close all the outfiles. for key in outfiles: outfiles[key].close()
def main(): parser = util_run_experiment.get_argparser() args = parser.parse_args() assert args.targetlang in all_target_languages assert args.model in ["unigram", "bigram", "trigram"] targetlang = args.targetlang trialdir = args.trialdir tt_home = args.treetaggerhome model = args.model print("Loading models...") lm, emissions = None, None if model != "unigram": picklefn = "pickles/{0}.lm_{1}.pickle".format(targetlang, model) with open(picklefn, "rb") as infile: lm = pickle.load(infile) picklefn = "pickles/{0}.emit.pickle".format(targetlang) with open(picklefn, "rb") as infile: emissions = pickle.load(infile) cfd = learn.reverse_cfd(emissions) emissions = learn.cpd(emissions) print("OK loaded models.") ## util_search.init_preset_dictionary(targetlang) for sourceword in util_run_experiment.final_test_words: print("Loading test problems for {0}".format(sourceword)) problems = util_run_experiment.get_test_instances(trialdir, sourceword) bestoutfn = "HMMoutput_{0}/{1}.{2}.best".format( model, sourceword, targetlang) with open(bestoutfn, "w") as bestoutfile: for problem in problems: answer = classify_for_hmm(problem, lm, emissions, cfd, targetlang, tt_home, model) print(output_one_best(problem, targetlang, answer), file=bestoutfile)
def main(): parser = util_run_experiment.get_argparser() args = parser.parse_args() assert args.targetlang in all_target_languages assert args.sourceword in all_words targetlang = args.targetlang sourceword = args.sourceword trialdir = args.trialdir stanford.taggerhome = args.taggerhome print("Loading and tagging test problems...") problems = util_run_experiment.get_test_instances(trialdir, sourceword) print("OK loaded and tagged.") ## classifier = get_maxent_classifier(sourceword, targetlang) classifier = get_pickled_classifier(sourceword, targetlang, "level1") if not classifier: print("Couldn't load pickled L1 classifier?") return print("Loaded pickled L1 classifier!") bestoutfn = "../L1output/{0}.{1}.best".format(sourceword, targetlang) oofoutfn = "../L1output/{0}.{1}.oof".format(sourceword, targetlang) with open(bestoutfn, "w") as bestoutfile, \ open(oofoutfn, "w") as oofoutfile: for problem in problems: featureset = features.extract(problem) answer = classifier.classify(featureset) dist = classifier.prob_classify(featureset) oof_answers = util_run_experiment.topfive(dist) print(output_one_best(problem, targetlang, answer), file=bestoutfile) print(output_five_best(problem, targetlang, oof_answers), file=oofoutfile)