Ejemplo n.º 1
0
def draw_scatterplot(data_frame, real_column, prediction_column, path, topic):
    data_frame = data_frame.sort_values(real_column)
    sort_id = list(range(0, len(data_frame.index)))
    data_frame['id'] = pd.Series(sort_id).values

    data_frame = fit(data_frame, real_column)
    data_frame = fit(data_frame, prediction_column)

    pearson = pearson_corr(data_frame[real_column].tolist(),
                           data_frame[prediction_column].tolist())
    spearman = spearman_corr(data_frame[real_column].tolist(),
                             data_frame[prediction_column].tolist())
    rmse_value = rmse(data_frame[real_column].tolist(),
                      data_frame[prediction_column].tolist())
    mae = mean_absolute_error(data_frame[real_column].tolist(),
                              data_frame[prediction_column].tolist())

    textstr = 'RMSE=%.4f\nMAE=%.4f\nPearson Correlation=%.4f\nSpearman Correlation=%.4f' % (
        rmse_value, mae, pearson, spearman)

    plt.figure()
    ax = data_frame.plot(kind='scatter',
                         x='id',
                         y=real_column,
                         color='DarkBlue',
                         label='z_mean',
                         title=topic)
    ax = data_frame.plot(kind='scatter',
                         x='id',
                         y=prediction_column,
                         color='DarkGreen',
                         label='predicted z_mean',
                         ax=ax)
    ax.text(0.5 * data_frame.shape[0],
            min(min(data_frame[real_column].tolist()),
                min(data_frame[prediction_column].tolist())),
            textstr,
            fontsize=10)

    fig = ax.get_figure()
    fig.savefig(path)
Ejemplo n.º 2
0
train = train.rename(columns={
    'original': 'text_a',
    'translation': 'text_b',
    'hter': 'labels'
}).dropna()
dev = dev.rename(columns={
    'original': 'text_a',
    'translation': 'text_b',
    'hter': 'labels'
}).dropna()
test = test.rename(columns={
    'original': 'text_a',
    'translation': 'text_b'
}).dropna()

train = fit(train, 'labels')
dev = fit(dev, 'labels')

assert (len(index) == 1000)
if siamese_transformer_config["evaluate_during_training"]:
    if siamese_transformer_config["n_fold"] > 0:
        dev_preds = np.zeros((len(dev), siamese_transformer_config["n_fold"]))
        test_preds = np.zeros(
            (len(test), siamese_transformer_config["n_fold"]))
        for i in range(siamese_transformer_config["n_fold"]):

            if os.path.exists(
                    siamese_transformer_config['best_model_dir']
            ) and os.path.isdir(siamese_transformer_config['best_model_dir']):
                shutil.rmtree(siamese_transformer_config['best_model_dir'])