コード例 #1
0
def plot_selections_per_epoch(
        data_file='results/update_grades_over_bach_chorales.csv',
        plt_dir='plots/augmented-generation/'):
    """
    plot number of selections each epoch
    """
    thres = get_threshold(
        data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv',
        column='grade',
        aggregate='75p',
    )
    data_dict = read_training_data(data_file=data_file, feature='grade')
    picked = [
        np.sum([1 for x in data if x < thres]) for data in data_dict.values()
    ]

    plt.figure()
    fig, ax = plt.subplots()
    plt.style.use('seaborn-whitegrid')
    ax.grid(False)
    rects = plt.bar(range(1, len(picked) + 1), picked)
    label_bars(rects)
    plt.xlabel('Epoch')
    plt.ylabel('Number of generations passing threshold')
    plt.title('Number of Generations Passing Threshold in Each Epoch')
    plt.savefig(
        os.path.join(plt_dir, 'generations_passing_threshold_per_epoch.png'))
コード例 #2
0
def plot_boxplot_per_epoch(
        data_file='results/update_grades_over_bach_chorales.csv',
        feature='grade',
        plt_dir='plots/augmented-generation/',
        threshold=None):
    """
    Arguments
        data_file: file containing upgrade grades
        feature: feature of interest (either overall grade or a feature distance)
        plt_dir: directory to save plots
        threshold: lower threshold for inclusion

    visualize model updates by plotting boxplot for grade distribution at each epoch
    """
    # read update data as dictionary
    data_dict = read_training_data(data_file=data_file, feature=feature)

    # plot
    plt.figure()
    plt.style.use('seaborn-whitegrid')
    plt.rc('xtick', labelsize=11)
    plt.rc('ytick', labelsize=11)
    plt.rc('axes', titlesize=13)
    fig, ax = plt.subplots(figsize=(8, 5))
    ax.xaxis.grid(False)
    ax.boxplot(list(data_dict.values()))
    ax.set_xticks([i + 1 for i in data_dict.keys()])
    ax.set_xticklabels([str(i) for i in data_dict.keys()])
    for label in ax.get_xaxis().get_ticklabels()[1::2]:
        label.set_visible(False)
    ylabel0 = ax.get_yaxis().get_ticklabels()[0]
    ylabel0.set_visible(False)
    plt.xlabel('Epoch')
    plt.title(
        f'{feature.capitalize()} Distribution of Generations During Aug-Gen Training'
    )
    plt.text(-2.2, 1, 'better')
    plt.text(-2.2, 47, 'worse')
    plt.ylabel(feature.capitalize())
    plt.ylim([0, 49.15])

    threshold = get_threshold(
        data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv',
        column='grade',
        aggregate='75p',
    )
    plt.axhline(y=threshold,
                color='steelblue',
                linestyle='-.',
                label=r'$Q_3$' + ' of Bach grades')
    plt.legend(loc='upper right')

    ensure_dir(plt_dir)
    fig.tight_layout()
    plt.savefig(os.path.join(plt_dir, f'{feature}_update_boxplots.png'))
コード例 #3
0
def get_good_mocks(model_dir):
    good_mock = []
    grades_df = pd.read_csv(f'{model_dir}/grades.csv')
    grades_df = grades_df.loc[grades_df['epoch'] <= 22]
    for row in grades_df.itertuples(index=False):
        if row.grade > get_threshold():
            epoch = row.epoch
            gen_id = row.gen_id
            score = parse_xml(f'{model_dir}/generations/{epoch}/{gen_id}.xml')
            good_mock.append(score)
    return good_mock
コード例 #4
0
def plot_median_grade_per_epoch(dir_dict, num_epochs):
    median_dict = defaultdict(lambda: [0] * num_epochs)
    for model_label, model_path in dir_dict.items():
        data_dict = read_training_data(data_file=f'{model_path}/grades.csv',
                                       feature='grade')
        for epoch, grades in data_dict.items():
            if epoch < num_epochs:
                median_dict[model_label][epoch] = np.median(grades)

    plt.figure()
    plt.style.use('seaborn-whitegrid')
    fig, ax = plt.subplots()
    ax.grid(False)
    thres = get_threshold(
        data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv',
        column='grade',
        aggregate='75p',
    )
    plt.axhline(y=thres,
                dashes=(2, 2),
                label='Lowest Bach\ngrade threshold',
                color=PLOT_COLORS['bach'])
    xlim = range(num_epochs)
    for model_label, median_grades in median_dict.items():
        plt.plot(xlim,
                 median_grades[:num_epochs],
                 label=PLOT_LABELS[model_label],
                 color=PLOT_COLORS[model_label])
    plt.title('Median Grade of Generations During Training')
    ax.set_xticks([i + 1 for i in xlim])
    ax.set_xticklabels([str(i) for i in xlim])
    for label in ax.get_xaxis().get_ticklabels()[1::2]:
        label.set_visible(False)
    # plt.legend(loc='right')
    handles, labels = ax.get_legend_handles_labels()
    lgd = ax.legend(handles,
                    labels,
                    loc='upper center',
                    bbox_to_anchor=(-0.2, 0.5))
    plt.ylabel('Grade')
    plt.xlabel('Epoch')
    plt.savefig('plots/median_grades_per_epoch.png', bbox_inches='tight')
コード例 #5
0
def main(
    train,
    load,
    aug_gen,
    base,
    generate,
    overfitted,
    epoch,
    config,
    description,
    num_workers,
):
    # Use all gpus available
    gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())]
    print(f'Using GPUs {gpu_ids}')

    # Load config
    config_path = config
    config_module_name = os.path.splitext(config)[0].replace('/', '.')
    config = importlib.import_module(config_module_name).config

    from experiments.augmentative_generation import augmentative_generation
    from experiments.generate_and_grade import grade_folder, grade_constrained_mock, grade_unconstrained_mock
    from Grader.grader import Grader, FEATURES
    from Grader.helpers import get_threshold

    # set random seed
    seed(config['random_seed'])

    # compute time stamp
    if config['timestamp'] is not None:
        timestamp = config['timestamp']
    else:
        timestamp = datetime.now().strftime('%m-%d_%H:%M')
        config['timestamp'] = timestamp

    # set model_dir
    if load:
        model_dir = os.path.dirname(config_path)
    else:
        if config['savename'] is None:
            if aug_gen:
                config['savename'] = 'aug-gen'
            elif base:
                config['savename'] = 'base'
            else:
                config['savename'] = 'model'
        model_dir = f'models/{config["savename"]}_{timestamp}'

    # === Decoder ====
    print('Parsing XML Bach dataset')
    bach_dataset = [
        parse_xml(f'chorales/bach_chorales/{i}.xml') for i in tqdm(range(351))
    ]
    num_examples = len(bach_dataset)
    split = [0.8, 0.2]
    train_dataset = bach_dataset[:int(split[0] * num_examples)]
    val_dataset = bach_dataset[int(split[0] * num_examples):]
    dataloader_generator_kwargs = config['dataloader_generator_kwargs']

    train_dataloader_generator = SmallBachDataloaderGenerator(
        dataset_name='bach_train',
        chorales=train_dataset,
        include_transpositions=dataloader_generator_kwargs[
            'include_transpositions'],
        sequences_size=dataloader_generator_kwargs['sequences_size'],
    )

    val_dataloader_generator = SmallBachDataloaderGenerator(
        dataset_name='bach_val',
        chorales=val_dataset,
        include_transpositions=dataloader_generator_kwargs[
            'include_transpositions'],
        sequences_size=dataloader_generator_kwargs['sequences_size'],
    )

    data_processor = get_data_processor(
        dataloader_generator=train_dataloader_generator,
        data_processor_type=config['data_processor_type'],
        data_processor_kwargs=config['data_processor_kwargs'])

    decoder_kwargs = config['decoder_kwargs']
    num_channels = 4  # is this number of voices?
    num_events_grouped = 4
    num_events = dataloader_generator_kwargs['sequences_size'] * 4
    transformer = TransformerBach(
        model_dir=model_dir,
        train_dataloader_generator=train_dataloader_generator,
        val_dataloader_generator=val_dataloader_generator,
        data_processor=data_processor,
        d_model=decoder_kwargs['d_model'],
        num_encoder_layers=decoder_kwargs['num_encoder_layers'],
        num_decoder_layers=decoder_kwargs['num_decoder_layers'],
        n_head=decoder_kwargs['n_head'],
        dim_feedforward=decoder_kwargs['dim_feedforward'],
        dropout=decoder_kwargs['dropout'],
        positional_embedding_size=decoder_kwargs['positional_embedding_size'],
        num_channels=num_channels,
        num_events=num_events,
        num_events_grouped=num_events_grouped)

    if load:
        if overfitted:
            transformer.load(early_stopped=False)
        elif epoch:
            transformer.load(epoch=epoch)
        else:
            transformer.load(early_stopped=True)
        transformer.to('cuda')

    # copy .py config file and create README in the model directory before training
    if not load:
        ensure_dir(model_dir)
        shutil.copy(config_path, f'{model_dir}/config.py')
        transformer.to('cuda')

        with open(f'{model_dir}/README.txt', 'w') as readme:
            readme.write(description)
            readme.close()

    grader = Grader(
        features=FEATURES,
        iterator=bach_dataset,
    )

    if train:
        transformer.train_model(
            batch_size=config['batch_size'],
            num_batches=config['num_batches'],
            num_epochs=config['num_epochs'],
            lr=config['lr'],
            plot=True,
            num_workers=num_workers,
        )

    if aug_gen:
        threshold = get_threshold(
            data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv',
            column='grade',
            aggregate='75p',
        )
        augmentative_generation(
            transformer=transformer,
            grader=grader,
            config=config,
            num_workers=num_workers,
            bach_iterator=train_dataset,
            threshold=threshold,
        )

    if base:
        # base model
        augmentative_generation(transformer=transformer,
                                grader=grader,
                                config=config,
                                num_workers=num_workers,
                                bach_iterator=train_dataset,
                                threshold=float('inf'))

    if generate:
        grade_constrained_mock(
            grader=grader,
            transformer=transformer,
            output_dir=f'{transformer.model_dir}/constrained_mocks',
            bach_iterator=bach_dataset,
            num_generations=1,
        )