Ejemplo n.º 1
0
def draw_scatterplot(data_frame, real_column, prediction_column, path, topic):
    data_frame = data_frame.sort_values(real_column)
    sort_id = list(range(0, len(data_frame.index)))
    data_frame['id'] = pd.Series(sort_id).values

    data_frame = fit(data_frame, real_column)
    data_frame = fit(data_frame, prediction_column)

    pearson = pearson_corr(data_frame[real_column].tolist(),
                           data_frame[prediction_column].tolist())
    spearman = spearman_corr(data_frame[real_column].tolist(),
                             data_frame[prediction_column].tolist())
    rmse_value = rmse(data_frame[real_column].tolist(),
                      data_frame[prediction_column].tolist())
    mae = mean_absolute_error(data_frame[real_column].tolist(),
                              data_frame[prediction_column].tolist())

    textstr = 'RMSE=%.4f\nMAE=%.4f\nPearson Correlation=%.4f\nSpearman Correlation=%.4f' % (
        rmse_value, mae, pearson, spearman)

    plt.figure()
    ax = data_frame.plot(kind='scatter',
                         x='id',
                         y=real_column,
                         color='DarkBlue',
                         label='z_mean',
                         title=topic)
    ax = data_frame.plot(kind='scatter',
                         x='id',
                         y=prediction_column,
                         color='DarkGreen',
                         label='predicted z_mean',
                         ax=ax)
    ax.text(0.5 * data_frame.shape[0],
            min(min(data_frame[real_column].tolist()),
                min(data_frame[prediction_column].tolist())),
            textstr,
            fontsize=10)

    fig = ax.get_figure()
    fig.savefig(path)
Ejemplo n.º 2
0
    'z_mean': 'labels'
}).dropna()
dev = dev.rename(columns={
    'original': 'text_a',
    'translation': 'text_b',
    'z_mean': 'labels'
}).dropna()
test = test.rename(columns={
    'original': 'text_a',
    'translation': 'text_b'
}).dropna()

test_sentence_pairs = list(
    map(list, zip(test['text_a'].to_list(), test['text_b'].to_list())))

train = fit(train, 'labels')
dev = fit(dev, 'labels')

assert (len(index) == 1000)
if transformer_config["evaluate_during_training"]:
    if transformer_config["n_fold"] > 1:
        dev_preds = np.zeros((len(dev), transformer_config["n_fold"]))
        test_preds = np.zeros((len(test), transformer_config["n_fold"]))
        for i in range(transformer_config["n_fold"]):

            if os.path.exists(
                    transformer_config['output_dir']) and os.path.isdir(
                        transformer_config['output_dir']):
                shutil.rmtree(transformer_config['output_dir'])

            model = QuestModel(MODEL_TYPE,
Ejemplo n.º 3
0
    }).dropna()
    dev_temp = dev_temp.rename(columns={
        'original': 'text_a',
        'translation': 'text_b',
        'z_mean': 'labels'
    }).dropna()
    test_temp = test_temp.rename(columns={
        'original': 'text_a',
        'translation': 'text_b'
    }).dropna()

    test_sentence_pairs_temp = list(
        map(list,
            zip(test_temp['text_a'].to_list(), test_temp['text_b'].to_list())))

    train_temp = fit(train_temp, 'labels')
    dev_temp = fit(dev_temp, 'labels')

    train_list.append(train_temp)
    dev_list.append(dev_temp)
    test_list.append(test_temp)
    index_list.append(index_temp)
    test_sentence_pairs_list.append(test_sentence_pairs_temp)

train = pd.concat(train_list)

if transformer_config["evaluate_during_training"]:
    if transformer_config["n_fold"] > 1:
        dev_preds_list = []
        test_preds_list = []