from transquest.algo.sentence_level.siamesetransquest.logging_handler import LoggingHandler from transquest.algo.sentence_level.siamesetransquest.run_model import SiameseTransQuestModel logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO, handlers=[LoggingHandler()]) if not os.path.exists(TEMP_DIRECTORY): os.makedirs(TEMP_DIRECTORY) TRAIN_FILE = "examples/sentence_level/wmt_2020/ru_en/data/ru-en/train.ruen.df.short.tsv" DEV_FILE = "examples/sentence_level/wmt_2020/ru_en/data/ru-en/dev.ruen.df.short.tsv" TEST_FILE = "examples/sentence_level/wmt_2020/ru_en/data/ru-en/test20.ruen.df.short.tsv" train = read_annotated_file(TRAIN_FILE, index="segid") dev = read_annotated_file(DEV_FILE, index="segid") test = read_test_file(TEST_FILE, index="segid") index = test['index'].to_list() train = train[['original', 'translation', 'z_mean']] dev = dev[['original', 'translation', 'z_mean']] test = test[['original', 'translation']] train = train.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() dev = dev.rename(columns={
from transquest.algo.sentence_level.siamesetransquest.logging_handler import LoggingHandler from transquest.algo.sentence_level.siamesetransquest.run_model import SiameseTransQuestModel logging.basicConfig(format='%(asctime)s - %(message)s', datefmt='%Y-%m-%d %H:%M:%S', level=logging.INFO, handlers=[LoggingHandler()]) if not os.path.exists(TEMP_DIRECTORY): os.makedirs(TEMP_DIRECTORY) TRAIN_FILE = "examples/sentence_level/wmt_2020/si_en/data/si-en/train.sien.df.short.tsv" DEV_FILE = "examples/sentence_level/wmt_2020/si_en/data/si-en/dev.sien.df.short.tsv" TEST_FILE = "examples/sentence_level/wmt_2020/si_en/data/si-en/test20.sien.df.short.tsv" train = read_annotated_file(TRAIN_FILE) dev = read_annotated_file(DEV_FILE) test = read_test_file(TEST_FILE) index = test['index'].to_list() train = train[['original', 'translation', 'z_mean']] dev = dev[['original', 'translation', 'z_mean']] test = test[['original', 'translation']] train = train.rename(columns={ 'original': 'text_a', 'translation': 'text_b', 'z_mean': 'labels' }).dropna() dev = dev.rename(columns={ 'original': 'text_a',
"examples/sentence_level/wmt_2020/si_en/data/si-en/train.sien.df.short.tsv", "examples/sentence_level/wmt_2020/si_en/data/si-en/dev.sien.df.short.tsv", "examples/sentence_level/wmt_2020/si_en/data/si-en/test20.sien.df.short.tsv" ], } train_list = [] dev_list = [] test_list = [] index_list = [] test_sentence_pairs_list = [] for key, value in languages.items(): if key == "RU-EN": train_temp = read_annotated_file(value[0], index="segid") dev_temp = read_annotated_file(value[1], index="segid") test_temp = read_test_file(value[2], index="segid") else: train_temp = read_annotated_file(value[0]) dev_temp = read_annotated_file(value[1]) test_temp = read_test_file(value[2]) train_temp = train_temp[['original', 'translation', 'z_mean']] dev_temp = dev_temp[['original', 'translation', 'z_mean']] test_temp = test_temp[['index', 'original', 'translation']] index_temp = test_temp['index'].to_list() train_temp = train_temp.rename(columns={ 'original': 'text_a',