def create_lemma_data(data=("metis", "presemt-dev", "wmt08", "wmt09", "wmt10", "wmt11"), lang_pairs=()): for data_set in data: for lang_pair in lang_pairs or config["eval"][data_set].keys(): target_lang = lang_pair.split("-")[1] lemma_ref_fname = config["eval"][data_set][lang_pair]["lemma_ref_fname"] create_dirs(lemma_ref_fname) lemmatize(config["eval"][data_set][lang_pair]["word_ref_fname"], target_lang, outf=lemma_ref_fname)
def lemmatize_reference(): """ Create lemmatized versions of the reference translations used for evaluation. """ lemmatize("sample_out_de-en.ref", "en", "lemma_sample_out_de-en.ref") lemmatize("sample_newstest2011-ref.de.sgm", "de", "lemma_sample_newstest2011-ref.de.sgm")