from examples.wmt_2020.ru_en.transformer_config import TEMP_DIRECTORY, DRIVE_FILE_ID, GOOGLE_DRIVE, MODEL_NAME, \ transformer_config, MODEL_TYPE, SEED, RESULT_FILE, RESULT_IMAGE, SUBMISSION_FILE from transquest.algo.transformers.evaluation import pearson_corr, spearman_corr from transquest.algo.transformers.run_model import QuestModel if not os.path.exists(TEMP_DIRECTORY): os.makedirs(TEMP_DIRECTORY) if GOOGLE_DRIVE: download_from_google_drive(DRIVE_FILE_ID, MODEL_NAME) TRAIN_FILE = "examples/wmt_2020/ru_en/data/ru-en/train.ruen.df.short.tsv" DEV_FILE = "examples/wmt_2020/ru_en/data/ru-en/dev.ruen.df.short.tsv" TEST_FILE = "examples/wmt_2020/ru_en/data/ru-en/test20.ruen.df.short.tsv" train = read_annotated_file(TRAIN_FILE, index="segid") dev = read_annotated_file(DEV_FILE, index="segid") test = read_test_file(TEST_FILE, index="segid") train = train[['original', 'translation', 'z_mean']] dev = dev[['original', 'translation', 'z_mean']] test = test[['index', 'original', 'translation']] index = test['index'].to_list() train = train.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() dev = dev.rename(columns={
from examples.wmt_2020.common.util.normalizer import fit, un_fit from examples.wmt_2020.common.util.postprocess import format_submission from examples.wmt_2020.common.util.reader import read_annotated_file, read_test_file from examples.wmt_2020.en_zh.transformer_config import TEMP_DIRECTORY, MODEL_TYPE, MODEL_NAME, transformer_config, SEED, \ RESULT_FILE, RESULT_IMAGE, SUBMISSION_FILE from transquest.algo.transformers.evaluation import pearson_corr, spearman_corr from transquest.algo.transformers.run_model import QuestModel if not os.path.exists(TEMP_DIRECTORY): os.makedirs(TEMP_DIRECTORY) TRAIN_FILE = "examples/wmt_2020/en_zh/data/en-zh/train.enzh.df.short.tsv" DEV_FILE = "examples/wmt_2020/en_zh/data/en-zh/dev.enzh.df.short.tsv" TEST_FILE = "examples/wmt_2020/en_zh/data/en-zh/test20.enzh.df.short.tsv" train = read_annotated_file(TRAIN_FILE) dev = read_annotated_file(DEV_FILE) test = read_test_file(TEST_FILE) train = train[['original', 'translation', 'z_mean']] dev = dev[['original', 'translation', 'z_mean']] test = test[['index', 'original', 'translation']] index = test['index'].to_list() train = train.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() dev = dev.rename(columns={ 'original': 'text_a',
# # "SI-EN": ["examples/wmt_2020/si_en/data/si-en/train.sien.df.short.tsv", # "examples/wmt_2020/si_en/data/si-en/dev.sien.df.short.tsv", # "examples/wmt_2020/si_en/data/si-en/test20.sien.df.short.tsv"], } train_list = [] dev_list = [] test_list = [] index_list = [] test_sentence_pairs_list = [] for key, value in languages.items(): if key == "RU-EN": train_temp = read_annotated_file(value[0], index="segid") dev_temp = read_annotated_file(value[1], index="segid") test_temp = read_test_file(value[2], index="segid") else: train_temp = read_annotated_file(value[0]) dev_temp = read_annotated_file(value[1]) test_temp = read_test_file(value[2]) train_temp = train_temp[['original', 'translation', 'z_mean']] dev_temp = dev_temp[['original', 'translation', 'z_mean']] test_temp = test_temp[['index', 'original', 'translation']] index_temp = test_temp['index'].to_list() train_temp = train_temp.rename(columns={ 'original': 'text_a',