model.train_model(train, pearson_corr=pearson_corr, spearman_corr=spearman_corr, mae=mean_absolute_error) result, model_outputs, wrong_predictions = model.eval_model( dev, pearson_corr=pearson_corr, spearman_corr=spearman_corr, mae=mean_absolute_error) predictions, raw_outputs = model.predict(test_sentence_pairs) dev['predictions'] = model_outputs test['predictions'] = predictions dev = un_fit(dev, 'labels') dev = un_fit(dev, 'predictions') test = un_fit(test, 'predictions') dev.to_csv(os.path.join(TEMP_DIRECTORY, RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8') draw_scatterplot(dev, 'labels', 'predictions', os.path.join(TEMP_DIRECTORY, RESULT_IMAGE), "Russian-English") print_stat(dev, 'labels', 'predictions') format_submission(df=test, index=index, language_pair="ru-en", method="TransQuest", path=os.path.join(TEMP_DIRECTORY, SUBMISSION_FILE), index_type="Auto")
args=transformer_config) model.train_model(train, pearson_corr=pearson_corr, spearman_corr=spearman_corr, mae=mean_absolute_error) result, model_outputs, wrong_predictions = model.eval_model( dev, pearson_corr=pearson_corr, spearman_corr=spearman_corr, mae=mean_absolute_error) predictions, raw_outputs = model.predict(test_sentence_pairs) dev['predictions'] = model_outputs test['predictions'] = predictions dev = un_fit(dev, 'labels') dev = un_fit(dev, 'predictions') test = un_fit(test, 'predictions') dev.to_csv(os.path.join(TEMP_DIRECTORY, RESULT_FILE), header=True, sep='\t', index=False, encoding='utf-8') draw_scatterplot(dev, 'labels', 'predictions', os.path.join(TEMP_DIRECTORY, RESULT_IMAGE), "English-Chinese") print_stat(dev, 'labels', 'predictions') format_submission(df=test, index=index, language_pair="en-zh", method="TransQuest", path=os.path.join(TEMP_DIRECTORY, SUBMISSION_FILE))
index=False, encoding='utf-8') draw_scatterplot( dev, 'labels', 'predictions', os.path.join( TEMP_DIRECTORY, RESULT_IMAGE.split(".")[0] + "_" + language + "." + RESULT_IMAGE.split(".")[1]), language) print_stat(dev, 'labels', 'predictions') if language == "RU-EN": format_submission(df=test, index=index, language_pair=language.lower(), method="TransQuest", path=os.path.join( TEMP_DIRECTORY, SUBMISSION_FILE.split(".")[0] + "_" + language + "." + SUBMISSION_FILE.split(".")[1]), index_type="Auto") else: format_submission(df=test, index=index, language_pair=language.lower(), method="TransQuest", path=os.path.join( TEMP_DIRECTORY, SUBMISSION_FILE.split(".")[0] + "_" + language + "." + SUBMISSION_FILE.split(".")[1]))