def test_model_upper_score_de_en(self): graphs = cPickle.load( open(config["test_data_dir"] + "/graphs_sample_out_de-en.pkl")) self.clear_scores(graphs) ref_fname = config["test_data_dir"] + "/lemma_sample_out_de-en.ref" ambig_fname = config["sample"]["de-en"]["ambig_fname"] filter = filter_functions("de") scorer = ModelUpperScorer(ref_fname, ambig_fname, filter) scorer(graphs) self.check_scores(graphs)
def compute_classifier_score(ns): models = ns.TranslationClassifier(ns.models_fname) if not ns.score_attr: ns.score_attr = ns.name + "_score" if not hasattr(ns, "vectorizer"): ns.vectorizer = Vectorizer() scorer = ns.ClassifierScore(models, score_attr=ns.score_attr, filter=filter_functions(ns.source_lang), vectorizer=ns.vectorizer) scorer(ns.graphs)
def preprocess(data_set, lang_pair): source_lang, target_lang = lang_pair.split("-") graphs_fname = config["eval"][data_set][lang_pair]["graphs_fname"] out_dir = os.path.dirname(graphs_fname) if not os.path.exists(out_dir): os.mkdir(out_dir) # annotate annotator = get_annotator(source_lang) graph_list = annotator.annot_xml_file( config["eval"][data_set][lang_pair]["src_fname"]) # lookup translations dict_fname = TransDict.load(config["dict"][lang_pair]["pkl_fname"]) lookup = Lookup(dict_fname) lookup(graph_list) # score most frequent translation freq_score = FreqScorer(config["count"]["lemma"][target_lang]["pkl_fname"]) freq_score(graph_list) # dict upper scores lemma_ref_fname = \ config["eval"][data_set][lang_pair]["lemma_ref_fname"] scorer = DictUpperScorer(lemma_ref_fname) scorer(graph_list) # model upper scores ambig_fname = config["sample"][lang_pair]["ambig_fname"] filter = filter_functions(source_lang) scorer = ModelUpperScorer(lemma_ref_fname, ambig_fname, filter) scorer(graph_list) # save graphs log.info("saving preprocessed graphs to " + graphs_fname) cPickle.dump(graph_list, open(graphs_fname, "wb"))
def nb_exp(data_sets=config["eval"]["data_sets"], lang_pairs=(), text=False, draw=False, diff=False, trash_models=False): n_components = 10 descriptor = [ ("data", "S16"), ("lang", "S8"), ("nist", "f"), ("blue", "f"), ("name", "S256") ] results = np.zeros(9999, dtype=descriptor) exp_count = 0 script_fname = os.path.splitext(os.path.basename(__file__))[0] results_fname = "_" + script_fname + "_results.txt" results_outf = open(results_fname, "w") for data in data_sets: for lang in lang_pairs or config["eval"][data].keys(): ambig_fname = config["sample"][lang]["ambig_fname"] try: samples_fname = config["sample"][lang]["samples_filt_fname"] except KeyError: samples_fname = config["sample"][lang]["samples_fname"] log.warn("backing off to unfiltered samples from " + samples_fname) graphs_fname = config["eval"][data][lang]["graphs_fname"] name = "{}_{}_{}".format( script_fname, data, lang) exp_dir = "_" + name if not os.path.exists(exp_dir): os.makedirs(exp_dir) models_fname = exp_dir + "/" + name + ".hdf5" classifier = Pipeline( [("MCF", MinCountFilter(5)), ("MFF", MaxFreqFilter(0.05)), ("CHI2", SelectFpr(chi2, alpha=0.001 )), ("NMF", NMF(n_components=n_components)), ("MNB", MultinomialNB()), ]) # get ambiguity map ambig_map = AmbiguityMap(ambig_fname, graphs_fname=graphs_fname) #ambig_map = AmbiguityMap(ambig_fname, subset={"klar/adj"}) # train classifier model_builder = ModelBuilder( ambig_map, samples_fname, models_fname, classifier) #,with_vocab_mask=True) model_builder.run() # apply classifier model = TranslationClassifier(models_fname) score_attr="nb_score" source_lang = lang.split("-")[0] scorer = ClassifierScore(model, score_attr=score_attr, filter=filter_functions(source_lang), vectorizer="mft") graph_list = cPickle.load(open(graphs_fname)) scorer(graph_list) best_scorer = BestScorer(["nb_score", "freq_score"]) best_scorer(graph_list) scored_graphs_fname = exp_dir + "/" + name + "_graphs.pkl" log.info("saving scored graphs to " + scored_graphs_fname) cPickle.dump(graph_list, open(scored_graphs_fname, "w")) #graph_list = cPickle.load(open(scored_graphs_fname)) nist_score, bleu_score = postprocess( name, data, lang, graph_list, best_score_attr="best_score", base_score_attrs=["nb_score","freq_score"], out_dir=exp_dir, base_fname=name, text=text, draw=draw, diff=diff ) results[exp_count] = (data, lang, nist_score, bleu_score, name) results_fname = exp_dir + "/" + name + ".npy" log.info("saving result to " + results_fname) np.save(results_fname, results[exp_count]) exp_count += 1 if trash_models: log.info("Trashing models file " + models_fname) os.remove(models_fname) # add to table of results per data set & language pair sub_results = results[(results["lang"] == lang) & (results["data"] == data)] sub_results = np.sort(sub_results, axis=0, order=("lang", "blue"))[::-1] text_table(sub_results, results_outf) results_outf.write("\n\n") results_outf.close() results = results[:exp_count] results_fname = "_" + script_fname + "_results.npy" log.info("saving pickled results to " + results_fname) np.save(results_fname, results) text_table(results) return results
def centroid_exp(data_sets=config["eval"]["data_sets"], lang_pairs=(), text=False, draw=False, diff=False, trash_models=False, dump_centroids=False): descriptor = [ ("data", "S16"), ("lang", "S8"), ("min_count", "f"), ("max_freq", "f"), ("nist", "f"), ("blue", "f"), ("name", "S256") ] results = np.zeros(9999, dtype=descriptor) exp_count = 0 script_fname = os.path.splitext(os.path.basename(__file__))[0] results_fname = "_" + script_fname + "_results.txt" results_outf = open(results_fname, "w") for data in data_sets: for lang in lang_pairs or config["eval"][data].keys(): ambig_fname = config["sample"][lang]["ambig_fname"] try: samples_fname = config["sample"][lang]["samples_filt_fname"] except KeyError: samples_fname = config["sample"][lang]["samples_fname"] log.warn("backing off to unfiltered samples from " + samples_fname) graphs_fname = config["eval"][data][lang]["graphs_fname"] #for min_count in (1, 5, 10, 25, 50, 100, 250, 1000, 2500, 5000): # for max_freq in (0.0001, 0.001, 0.005, 0.01, 0.05, 0.10, 0.25, 0.5, 1.0): for min_count in (5,): for max_freq in (0.01,): name = "{}_{}_{}_min_count={:d}_max_freq={:f}".format( script_fname, data, lang, min_count, max_freq) exp_dir = "_" + name if not os.path.exists(exp_dir): os.makedirs(exp_dir) models_fname = exp_dir + "/" + name + ".hdf5" classifier = Pipeline( [("MCF", MinCountFilter(min_count)), ("MFF", MaxFreqFilter(max_freq)), ("CHI2", SelectFpr()), #("TFIDF", TfidfTransformer()), ("CNC", CosNearestCentroid()) #("NC", NearestCentroidProb()) ]) # train classifier model_builder = ModelBuilder( ambig_fname, samples_fname, models_fname, classifier, graphs_fname, with_vocab_mask=True) model_builder.run() # print the centroids to a file, only the 50 best features if dump_centroids: print_fname = exp_dir + "/" + name + "_centroids.txt" print_centroids(models_fname, n=50, outf=print_fname) # apply classifier model = TranslationClassifier(models_fname) score_attr="centroid_score" source_lang = lang.split("-")[0] scorer = ClassifierScore(model, score_attr=score_attr, filter=filter_functions(source_lang)) graph_list = cPickle.load(open(graphs_fname)) scorer(graph_list) best_scorer = BestScorer(["centroid_score", "freq_score"]) best_scorer(graph_list) scored_graphs_fname = exp_dir + "/" + name + "_graphs.pkl" log.info("saving scored graphs to " + scored_graphs_fname) cPickle.dump(graph_list, open(scored_graphs_fname, "w")) #graph_list = cPickle.load(open(scored_graphs_fname)) nist_score, bleu_score = postprocess( name, data, lang, graph_list, best_score_attr="best_score", base_score_attrs=["centroid_score","freq_score"], out_dir=exp_dir, base_fname=name, text=text, draw=draw, diff=diff ) results[exp_count] = (data, lang, min_count, max_freq, nist_score, bleu_score, name) results_fname = exp_dir + "/" + name + ".npy" log.info("saving result to " + results_fname) np.save(results_fname, results[exp_count]) exp_count += 1 if trash_models: log.info("Trashing models file " + models_fname) os.remove(models_fname) sub_results = results[(results["lang"] == lang) & (results["data"] == data)] sub_results = np.sort(sub_results, axis=0, order=("lang", "blue"))[::-1] text_table(sub_results, results_outf) results_outf.write("\n\n") results_outf.close() results = results[:exp_count] results_fname = "_" + script_fname + "_results.npy" log.info("saving pickled results to " + results_fname) np.save(results_fname, results) text_table(results) return results
def make_graphs(): """ Create annotated translations graphs with scores for random translation, most frequent translation and approximated maximum. Also create minimal translation dictionaries for these graphs and drawings. """ for lang_pair, src_fname, lemma_ref_fname in [ ("en-de", "sample_newstest2011-src.en.sgm", "lemma_sample_newstest2011-ref.de.sgm"), ("de-en", "sample_out_de-en.src", "lemma_sample_out_de-en.ref") ]: source_lang, target_lang = lang_pair.split("-") root_fname = splitext(src_fname)[0] # annotate annotator = get_annotator(source_lang) graphs = annotator.annot_xml_file(src_fname) # lookup dict_fname = config["dict"][lang_pair]["pkl_fname"] trans_dict = TransDict.load(dict_fname) lookup = LookupKeepKeys(trans_dict) lookup(graphs) # write pickle of minimal translation dict min_dict = lookup.get_minimal_trans_dict() min_dict_fname = "dict_" + root_fname + ".pkl" dump(min_dict, open(min_dict_fname, "wb")) # score most frequent translation counts_fname = config["count"]["lemma"][target_lang]["pkl_fname"] freq_score = FreqScorer(counts_fname) freq_score(graphs) # score random translation counts_fname = config["count"]["lemma"][target_lang]["pkl_fname"] rand_score = RandScorer() rand_score(graphs) # dict upper score maxscore = DictUpperScorer(lemma_ref_fname) maxscore(graphs) # model upper scores ambig_fname = config["sample"][lang_pair]["ambig_fname"] filter = filter_functions(source_lang) scorer = ModelUpperScorer(lemma_ref_fname, ambig_fname, filter) scorer(graphs) # draw graphs draw = Draw() draw(graphs, out_format="pdf", base_score_attrs=["dup_score", "mup_score", "freq_score", "rand_score"], out_dir="_draw_" + lang_pair) # save graphs graphs_fname = "graphs_" + root_fname + ".pkl" dump(graphs, open(graphs_fname, "wb"))