Beispiel #1
0
from examples.sentence_level.wmt_2018.common.util.postprocess import format_submission
from examples.sentence_level.wmt_2018.common.util.reader import read_annotated_file, read_test_file
from examples.sentence_level.wmt_2018.en_lv.nmt.monotransquest_config import TEMP_DIRECTORY, MODEL_NAME, \
    monotransquest_config, MODEL_TYPE, SEED, RESULT_FILE, SUBMISSION_FILE, RESULT_IMAGE
from transquest.algo.sentence_level.monotransquest.evaluation import pearson_corr, spearman_corr
from transquest.algo.sentence_level.monotransquest.run_model import MonoTransQuestModel

if not os.path.exists(TEMP_DIRECTORY):
    os.makedirs(TEMP_DIRECTORY)

TRAIN_FOLDER = "examples/sentence_level/wmt_2018/en_lv/data/en_lv/"
DEV_FOLDER = "examples/sentence_level/wmt_2018/en_lv/data/en_lv/"
TEST_FOLDER = "examples/sentence_level/wmt_2018/en_lv/data/en_lv/"

train = read_annotated_file(path=TRAIN_FOLDER,
                            original_file="train.nmt.src",
                            translation_file="train.nmt.mt",
                            hter_file="train.nmt.hter")
dev = read_annotated_file(path=DEV_FOLDER,
                          original_file="dev.nmt.src",
                          translation_file="dev.nmt.mt",
                          hter_file="dev.nmt.hter")
test = read_test_file(path=TEST_FOLDER,
                      original_file="test.nmt.src",
                      translation_file="test.nmt.mt")

train = train[['original', 'translation', 'hter']]
dev = dev[['original', 'translation', 'hter']]
test = test[['index', 'original', 'translation']]

index = test['index'].to_list()
train = train.rename(columns={
Beispiel #2
0
        "examples/sentence_level/wmt_2018/en_lv/data/en_lv",
        "examples/sentence_level/wmt_2018/en_lv/data/en_lv", "smt"
    ],
}

train_list = []
dev_list = []
test_list = []
index_list = []
test_sentence_pairs_list = []

for key, value in languages.items():

    if value[3] == "nmt":
        train_temp = read_annotated_file(path=value[0],
                                         original_file="train.nmt.src",
                                         translation_file="train.nmt.mt",
                                         hter_file="train.nmt.hter")
        dev_temp = read_annotated_file(path=value[1],
                                       original_file="dev.nmt.src",
                                       translation_file="dev.nmt.mt",
                                       hter_file="dev.nmt.hter")
        test_temp = read_test_file(path=value[2],
                                   original_file="test.nmt.src",
                                   translation_file="test.nmt.mt")

    elif value[3] == "smt":
        train_temp = read_annotated_file(path=value[0],
                                         original_file="train.smt.src",
                                         translation_file="train.smt.mt",
                                         hter_file="train.smt.hter")
        dev_temp = read_annotated_file(path=value[1],