예제 #1
0
def plot_test_vs_validation_set(output_name, C, sigma, M, xlim=None, ylim=None, xticks=None, yticks=None):
    """
    Plot a single figure which compares the expected validation and generalization errors
    for various numbers of training samples (n), using either m=1 or m=n validation samples.
    """
    import matplotlib.pyplot as plt
    plt.ioff()
    plt.style.use('./latex-paper.mplstyle')
    plt.figure()
    ax = plt.axes()
    ax.yaxis.grid(True)

    d = pickler.load(f'categorical_K2_C{C}_sigma{sigma:.2f}_M{M}'.replace('.','_'))
    x_values = [x.n_train for x in d.xs]
    plt.plot(x_values, d.results[:,0], 'C0-', label=r'$\mathrm{e}_{\mathrm{val}}(m=n)$')
    plt.plot(x_values, d.results[:,1], 'C1-', label=r'$\mathrm{e}_{\mathrm{gen}}(m=n)$')

    d = pickler.load(f'categorical_LOO_C{C}_sigma{sigma:.2f}_M{M}'.replace('.','_'))
    x_values = [x.n_train for x in d.xs]
    plt.plot(x_values, d.results[:,0], 'C0--', label=r'$\mathrm{e}_{\mathrm{val}}(m=1)$')
    plt.plot(x_values, d.results[:,1], 'C1--', label=r'$\mathrm{e}_{\mathrm{gen}}(m=1)$')

    plt.xlabel('n')
    plt.ylabel('MSE')
    if xlim is not None:
        plt.xlim(xlim)
    if ylim is not None:
        plt.ylim(ylim)
    if xticks is not None:
        plt.xticks(xticks)
    if yticks is not None:
        plt.yticks(yticks)
    
    plt.legend(loc='best')
    simulations_framework.save_figure(output_name + f'_reps{d.n_repetitions}')
예제 #2
0
def plot_res(p, df, sigma, with_intercept):
    (xs, null_model_mse, correct_mses, incorrect_mses,
     n_reps) = read_results_with_confintervals(p, df, sigma, with_intercept)

    plt.style.use('./latex-paper.mplstyle')
    fig, ax = plt.subplots()
    X = [x.n_train + x.n_validation for x in xs]
    ax.fill_between(X, correct_mses[:, 0], correct_mses[:, 1], alpha=0.2)
    ax.fill_between(X, incorrect_mses[:, 0], incorrect_mses[:, 1], alpha=0.2)
    ax.plot(X, np.mean(correct_mses, axis=1), '-', label='Correct')
    ax.plot(X, np.mean(incorrect_mses, axis=1), '-', label='Incorrect')
    ax.plot([X[0], X[-1]], [null_model_mse] * 2,
            'k:',
            linewidth=1.0,
            label='Null model')
    ax.legend()
    ax.set_xticks(X)
    ax.set_xlim([X[0], X[-1]])
    ax.set_xlabel('N')
    ax.set_ylabel('MSE')
    #latexsigma = r'\sigma'
    #ax.set_title(f'$p={p} \ df={df} \ {latexsigma}={sigma} \ intr={int(with_intercept)} \ reps={n_reps}$')
    ax.grid(axis='y')

    save_figure(
        f'normalized_lasso_cv_pipeline_p{p}_df{df}_sigma{sigma}_10FOLDCV_reps{n_reps}',
        'normalized_lasso_pipeline')
예제 #3
0
def plot_test_vs_validation_set(subdir,
                                filename_prefix,
                                M,
                                normalize,
                                xlim=None,
                                ylim=None,
                                xticks=None,
                                yticks=None):
    """
    Plot a single figure which compares the expected validation and generalization errors
    for various numbers of training samples (n), using either m=1 or m=n validation samples.
    """
    import matplotlib.pyplot as plt
    plt.ioff()
    plt.style.use('./latex-paper.mplstyle')
    plt.figure()
    ax = plt.axes()
    ax.yaxis.grid(True)

    d = pickler.load(f'{filename_prefix}_K2_M{M}_normalize{normalize}')
    x_values = [x.n_train for x in d.xs]
    plt.plot(x_values,
             d.results[:, 0],
             'C0-',
             linewidth=1.5,
             label=r'$\mathrm{e}_{\mathrm{val}}(m=n)$')
    plt.plot(x_values,
             d.results[:, 1],
             'C1-',
             linewidth=1.5,
             label=r'$\mathrm{e}_{\mathrm{gen}}(m=n)$')

    d = pickler.load(f'{filename_prefix}_LOO_M{M}_normalize{normalize}')
    x_values = [x.n_train for x in d.xs]
    plt.plot(x_values,
             d.results[:, 0],
             'C0--',
             linewidth=1.5,
             label=r'$\mathrm{e}_{\mathrm{val}}(m=1)$')
    plt.plot(x_values,
             d.results[:, 1],
             'C1--',
             linewidth=1.5,
             label=r'$\mathrm{e}_{\mathrm{gen}}(m=1)$')

    plt.xlabel('$n$')
    plt.ylabel('MSE')
    plt.xlim(xlim if xlim is not None else [min(x_values), max(x_values)])
    if ylim is not None:
        plt.ylim(ylim)
    if xticks is not None:
        plt.xticks(xticks)
    if yticks is not None:
        plt.yticks(yticks)

    #ax.set_yscale('log')
    plt.legend(loc='best')
    output_name = f'{filename_prefix}_M{M}_normalize{normalize}_reps{d.n_repetitions}'
    save_figure(output_name, subdir=subdir)
예제 #4
0
def plot_test_vs_validation_set(filename_prefix, D, df, K_strong_columns, strong_column_multiplier, K, noise_multiplier, xlim=None, ylim=None, xticks=None, yticks=None):
    """
    Plot a single figure which compares the expected validation and generalization errors
    for various numbers of training samples (n), using either m=1 or m=n validation samples.
    """
    import matplotlib.pyplot as plt
    plt.ioff()
    plt.style.use('./latex-paper.mplstyle')
    plt.figure()
    ax = plt.axes()
    ax.yaxis.grid(True)

    d = pickler.load(f'variable_selected_linear_regression_K2_D{D}_df{df}_Kstrong{K_strong_columns}_multiplier{strong_column_multiplier}_K{K}_noisemul{noise_multiplier:.2f}')
    x_values = [x.n_train for x in d.xs]
    plt.plot(x_values, d.results[:,0], 'C0-', linewidth=1.5, label=r'$\mathrm{e}_{\mathrm{val}}(m=n)$')
    plt.plot(x_values, d.results[:,1], 'C1-', linewidth=1.5, label=r'$\mathrm{e}_{\mathrm{gen}}(m=n)$')

    d = pickler.load(f'variable_selected_linear_regression_LOO_D{D}_df{df}_Kstrong{K_strong_columns}_multiplier{strong_column_multiplier}_K{K}_noisemul{noise_multiplier:.2f}')
    x_values = [x.n_train for x in d.xs]
    plt.plot(x_values, d.results[:,0], 'C0--', linewidth=1.5, label=r'$\mathrm{e}_{\mathrm{val}}(m=1)$')
    plt.plot(x_values, d.results[:,1], 'C1--', linewidth=1.5, label=r'$\mathrm{e}_{\mathrm{gen}}(m=1)$')

    d = pickler.load(f'variable_selected_linear_regression_null_model_D{D}_df{df}_Kstrong{K_strong_columns}_multiplier{strong_column_multiplier}_noisemul{noise_multiplier:.2f}')
    null_mse = d.results[0][0]
    plt.plot([min(x_values), max(x_values)], [null_mse, null_mse], 'k:', linewidth=1.0, label='Null model')

    plt.xlabel('$n$')
    plt.ylabel('MSE')
    plt.xlim(xlim if xlim is not None else [min(x_values), max(x_values)])
    if ylim is not None:
        plt.ylim(ylim)
    if xticks is not None:
        plt.xticks(xticks)
    if yticks is not None:
        plt.yticks(yticks)
    
    plt.legend(loc='best')
    output_name = f'{filename_prefix}_D{D}_df{df}_Kstrong{K_strong_columns}_multiplier{strong_column_multiplier}_K{K}_noisemul{noise_multiplier:.2f}_reps{d.n_repetitions}'
    save_figure(output_name, f'noise_{noise_multiplier:.2f}')