from examples.sentence_level.wmt_2018.common.util.postprocess import format_submission from examples.sentence_level.wmt_2018.common.util.reader import read_annotated_file, read_test_file from examples.sentence_level.wmt_2018.en_lv.nmt.monotransquest_config import TEMP_DIRECTORY, MODEL_NAME, \ monotransquest_config, MODEL_TYPE, SEED, RESULT_FILE, SUBMISSION_FILE, RESULT_IMAGE from transquest.algo.sentence_level.monotransquest.evaluation import pearson_corr, spearman_corr from transquest.algo.sentence_level.monotransquest.run_model import MonoTransQuestModel if not os.path.exists(TEMP_DIRECTORY): os.makedirs(TEMP_DIRECTORY) TRAIN_FOLDER = "examples/sentence_level/wmt_2018/en_lv/data/en_lv/" DEV_FOLDER = "examples/sentence_level/wmt_2018/en_lv/data/en_lv/" TEST_FOLDER = "examples/sentence_level/wmt_2018/en_lv/data/en_lv/" train = read_annotated_file(path=TRAIN_FOLDER, original_file="train.nmt.src", translation_file="train.nmt.mt", hter_file="train.nmt.hter") dev = read_annotated_file(path=DEV_FOLDER, original_file="dev.nmt.src", translation_file="dev.nmt.mt", hter_file="dev.nmt.hter") test = read_test_file(path=TEST_FOLDER, original_file="test.nmt.src", translation_file="test.nmt.mt") train = train[['original', 'translation', 'hter']] dev = dev[['original', 'translation', 'hter']] test = test[['index', 'original', 'translation']] index = test['index'].to_list() train = train.rename(columns={
"examples/sentence_level/wmt_2018/en_lv/data/en_lv", "examples/sentence_level/wmt_2018/en_lv/data/en_lv", "smt" ], } train_list = [] dev_list = [] test_list = [] index_list = [] test_sentence_pairs_list = [] for key, value in languages.items(): if value[3] == "nmt": train_temp = read_annotated_file(path=value[0], original_file="train.nmt.src", translation_file="train.nmt.mt", hter_file="train.nmt.hter") dev_temp = read_annotated_file(path=value[1], original_file="dev.nmt.src", translation_file="dev.nmt.mt", hter_file="dev.nmt.hter") test_temp = read_test_file(path=value[2], original_file="test.nmt.src", translation_file="test.nmt.mt") elif value[3] == "smt": train_temp = read_annotated_file(path=value[0], original_file="train.smt.src", translation_file="train.smt.mt", hter_file="train.smt.hter") dev_temp = read_annotated_file(path=value[1],