Beispiel #1
0
            pearson_corr=pearson_corr,
            spearman_corr=spearman_corr,
            mae=mean_absolute_error)
        predictions, raw_outputs = model.predict(test_sentence_pairs)
        dev['predictions'] = model_outputs
        test['predictions'] = predictions

for dev, test, index, language in zip(dev_list, test_list, index_list,
                                      [*languages]):
    dev = un_fit(dev, 'labels')
    dev = un_fit(dev, 'predictions')
    test = un_fit(test, 'predictions')
    dev.to_csv(os.path.join(
        TEMP_DIRECTORY,
        RESULT_FILE.split(".")[0] + "_" + language + "." +
        RESULT_FILE.split(".")[1]),
               header=True,
               sep='\t',
               index=False,
               encoding='utf-8')
    print(language)
    print_stat(dev, 'labels', 'predictions')

    format_submission(df=test,
                      index=index,
                      method="TransQuest",
                      path=os.path.join(
                          TEMP_DIRECTORY,
                          SUBMISSION_FILE.split(".")[0] + "_" + language +
                          "." + SUBMISSION_FILE.split(".")[1]))
            with open(
                    os.path.join(siamese_transformer_config['cache_dir'],
                                 "dev_result.txt")) as f:
                dev_preds[:, i] = list(map(float, f.read().splitlines()))

            with open(
                    os.path.join(siamese_transformer_config['cache_dir'],
                                 "test_result.txt")) as f:
                test_preds[:, i] = list(map(float, f.read().splitlines()))

        dev['predictions'] = dev_preds.mean(axis=1)
        test['predictions'] = test_preds.mean(axis=1)

dev = un_fit(dev, 'labels')
dev = un_fit(dev, 'predictions')
test = un_fit(test, 'predictions')
dev.to_csv(os.path.join(TEMP_DIRECTORY, RESULT_FILE),
           header=True,
           sep='\t',
           index=False,
           encoding='utf-8')
draw_scatterplot(dev, 'labels', 'predictions',
                 os.path.join(TEMP_DIRECTORY, RESULT_IMAGE),
                 "English-German-SMT")
print_stat(dev, 'labels', 'predictions')
format_submission(df=test,
                  index=index,
                  method="SiameseTransQuest",
                  path=os.path.join(TEMP_DIRECTORY, SUBMISSION_FILE))