test_size=0.1, random_state=SEED * i) model = SiameseTransQuestModel(MODEL_NAME, args=siamesetransquest_config) model.train_model(train_df, eval_df) model = SiameseTransQuestModel( siamesetransquest_config['best_model_dir']) dev_preds[:, i] = model.predict(dev_sentence_pairs) test_preds[:, i] = model.predict(test_sentence_pairs) dev['predictions'] = dev_preds.mean(axis=1) test['predictions'] = test_preds.mean(axis=1) dev = un_fit(dev, 'labels') dev = un_fit(dev, 'predictions') test = un_fit(test, 'predictions') dev.to_csv(os.path.join(TEMP_DIRECTORY, RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8') draw_scatterplot(dev, 'labels', 'predictions', os.path.join(TEMP_DIRECTORY, RESULT_IMAGE), "Sinhala-English") print_stat(dev, 'labels', 'predictions') format_submission(df=test, index=index, language_pair="si-en", method="SiameseTransQuest", path=os.path.join(TEMP_DIRECTORY, SUBMISSION_FILE))
index=False, encoding='utf-8') draw_scatterplot( dev, 'labels', 'predictions', os.path.join( TEMP_DIRECTORY, RESULT_IMAGE.split(".")[0] + "_" + language + "." + RESULT_IMAGE.split(".")[1]), language) print_stat(dev, 'labels', 'predictions') if language == "RU-EN": format_submission(df=test, index=index, language_pair=language.lower(), method="TransQuest", path=os.path.join( TEMP_DIRECTORY, SUBMISSION_FILE.split(".")[0] + "_" + language + "." + SUBMISSION_FILE.split(".")[1]), index_type="Auto") else: format_submission(df=test, index=index, language_pair=language.lower(), method="TransQuest", path=os.path.join( TEMP_DIRECTORY, SUBMISSION_FILE.split(".")[0] + "_" + language + "." + SUBMISSION_FILE.split(".")[1]))