Python read_training_data Examples, experiments.helpers.read_training_data Python Examples

Example #1

0

Show file

def plot_selections_per_epoch(
        data_file='results/update_grades_over_bach_chorales.csv',
        plt_dir='plots/augmented-generation/'):
    """
    plot number of selections each epoch
    """
    thres = get_threshold(
        data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv',
        column='grade',
        aggregate='75p',
    )
    data_dict = read_training_data(data_file=data_file, feature='grade')
    picked = [
        np.sum([1 for x in data if x < thres]) for data in data_dict.values()
    ]

    plt.figure()
    fig, ax = plt.subplots()
    plt.style.use('seaborn-whitegrid')
    ax.grid(False)
    rects = plt.bar(range(1, len(picked) + 1), picked)
    label_bars(rects)
    plt.xlabel('Epoch')
    plt.ylabel('Number of generations passing threshold')
    plt.title('Number of Generations Passing Threshold in Each Epoch')
    plt.savefig(
        os.path.join(plt_dir, 'generations_passing_threshold_per_epoch.png'))

Example #2

0

Show file

def plot_histogram_per_iteration(data_file,
                                 feature='grade',
                                 plt_dir='plots/augmented-generation/',
                                 threshold=None):
    """
    visualize model updates by plotting histogram for grade distribution at each iteration
    """
    # read update data as dictionary
    data_dict = read_training_data(data_file=data_file,
                                   feature=feature,
                                   threshold=threshold)

    plt.figure(figsize=(20, 10))
    plt.style.use('seaborn-whitegrid')
    for it, data in data_dict.items():
        plt.subplot(2, 5, it)
        plt.hist(data, alpha=0.7)
        plt.xlabel(feature)
        plt.title(f'Iteration {it}')

    plt.tight_layout(rect=[0, 0.03, 1, 0.95])
    plt.suptitle(
        f'{feature} Distribution of Generations at Each Iteration of Training',
        fontsize=20)
    ensure_dir(plt_dir)
    plt.savefig(os.path.join(plt_dir, f'{feature}_update_dist.png'))
    plt.close()

Example #3

0

Show file

def main():
    bach_data = list(pd.read_csv(f'{bach_dir}/grades.csv')['grade'])
    # aug_gen_data = list(read_training_data(f'{aug_gen_dir}/grades.csv', feature='grade')[20])
    # base_data = list(read_training_data(f'{base_dir}/grades.csv', feature='grade')[17])
    baseline_data = list(
        read_training_data(f'{baseline_dir}/grades.csv', feature='grade')[17])

    aug_gen_df = pd.read_csv(f'{aug_gen_dir}/351_mocks/grades.csv')
    aug_gen_data = aug_gen_df['grade']
    base_df = pd.read_csv(f'{base_dir}/351_mocks/grades.csv')
    base_data = base_df['grade']
    base_data = [x for x in base_data if x < 50]
    baseline_data = [x for x in baseline_data if x < 50]
    data_dict = {
        'Bach': bach_data,
        'Aug-Gen\n' + r'($t=Q_3$ of Bach grades)': aug_gen_data,
        'Baseline-none\n' + r'($t=-\infty$)': base_data,
        'Baseline-all\n' + r'($t=\infty$)': baseline_data,
    }

    for model, data in data_dict.items():
        print(model)
        print(np.median(data))
        print(np.std(data))

    plot_violinplots(
        data_dict=data_dict,
        plt_title='Grade Distribution of Generations from Different Models',
        plt_dir='plots/')

Example #4

0

Show file

def plot_boxplot_per_epoch(
        data_file='results/update_grades_over_bach_chorales.csv',
        feature='grade',
        plt_dir='plots/augmented-generation/',
        threshold=None):
    """
    Arguments
        data_file: file containing upgrade grades
        feature: feature of interest (either overall grade or a feature distance)
        plt_dir: directory to save plots
        threshold: lower threshold for inclusion

    visualize model updates by plotting boxplot for grade distribution at each epoch
    """
    # read update data as dictionary
    data_dict = read_training_data(data_file=data_file, feature=feature)

    # plot
    plt.figure()
    plt.style.use('seaborn-whitegrid')
    plt.rc('xtick', labelsize=11)
    plt.rc('ytick', labelsize=11)
    plt.rc('axes', titlesize=13)
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.xaxis.grid(False)
    ax.boxplot(list(data_dict.values()))
    ax.set_xticks([i + 1 for i in data_dict.keys()])
    ax.set_xticklabels([str(i) for i in data_dict.keys()])
    for label in ax.get_xaxis().get_ticklabels()[1::2]:
        label.set_visible(False)
    ylabel0 = ax.get_yaxis().get_ticklabels()[0]
    ylabel0.set_visible(False)
    plt.xlabel('Epoch')
    plt.title(
        f'{feature.capitalize()} Distribution of Generations During Aug-Gen Training'
    )
    plt.text(-2.2, 1, 'better')
    plt.text(-2.2, 47, 'worse')
    plt.ylabel(feature.capitalize())
    plt.ylim([0, 49.15])

    threshold = get_threshold(
        data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv',
        column='grade',
        aggregate='75p',
    )
    plt.axhline(y=threshold,
                color='steelblue',
                linestyle='-.',
                label=r'$Q_3$' + ' of Bach grades')
    plt.legend(loc='upper right')

    ensure_dir(plt_dir)
    fig.tight_layout()
    plt.savefig(os.path.join(plt_dir, f'{feature}_update_boxplots.png'))

Example #5

0

Show file

def plot_median_grade_per_epoch(dir_dict, num_epochs):
    median_dict = defaultdict(lambda: [0] * num_epochs)
    for model_label, model_path in dir_dict.items():
        data_dict = read_training_data(data_file=f'{model_path}/grades.csv',
                                       feature='grade')
        for epoch, grades in data_dict.items():
            if epoch < num_epochs:
                median_dict[model_label][epoch] = np.median(grades)

    plt.figure()
    plt.style.use('seaborn-whitegrid')
    fig, ax = plt.subplots()
    ax.grid(False)
    thres = get_threshold(
        data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv',
        column='grade',
        aggregate='75p',
    )
    plt.axhline(y=thres,
                dashes=(2, 2),
                label='Lowest Bach\ngrade threshold',
                color=PLOT_COLORS['bach'])
    xlim = range(num_epochs)
    for model_label, median_grades in median_dict.items():
        plt.plot(xlim,
                 median_grades[:num_epochs],
                 label=PLOT_LABELS[model_label],
                 color=PLOT_COLORS[model_label])
    plt.title('Median Grade of Generations During Training')
    ax.set_xticks([i + 1 for i in xlim])
    ax.set_xticklabels([str(i) for i in xlim])
    for label in ax.get_xaxis().get_ticklabels()[1::2]:
        label.set_visible(False)
    # plt.legend(loc='right')
    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    loc='upper center',
                    bbox_to_anchor=(-0.2, 0.5))
    plt.ylabel('Grade')
    plt.xlabel('Epoch')
    plt.savefig('plots/median_grades_per_epoch.png', bbox_inches='tight')