def grade_folder(chorale_dir, grader, grades_csv=None):
    """
    Arguments:
        chorale_dir: directory of chorales named 0.xml, 1.xml, etc.
        grader: Grader object
        grades_csv: file to write grades to
    
    grade a folder of chorales 
    """
    print(f'Grading chorales in {chorale_dir}')
    grades = []
    num_chorales = int(
        np.sum([
            1 for fname in os.listdir(chorale_dir) if fname.endswith('.xml')
        ]))

    for chorale_idx in tqdm(range(num_chorales)):
        chorale_xml = f'{chorale_dir}/{chorale_idx}.xml'
        score = parse_xml(chorale_xml)
        grade, chorale_vector = grader.grade_chorale(score)
        grades.append([grade, *chorale_vector])

    if grades_csv is None:
        grades_csv = f'{chorale_dir}/grades.csv'
    print(f'Writing data to {chorale_dir}/{grades_csv}')
    with open(f'{chorale_dir}/{grades_csv}', 'w') as chorale_file:
        writer = csv.writer(chorale_file)
        writer.writerow(['', 'grade'] + grader.features)
        for i, grades in enumerate(grades):
            writer.writerow([i, *grades])
def get_good_mocks(model_dir):
    good_mock = []
    grades_df = pd.read_csv(f'{model_dir}/grades.csv')
    grades_df = grades_df.loc[grades_df['epoch'] <= 22]
    for row in grades_df.itertuples(index=False):
        if row.grade > get_threshold():
            epoch = row.epoch
            gen_id = row.gen_id
            score = parse_xml(f'{model_dir}/generations/{epoch}/{gen_id}.xml')
            good_mock.append(score)
    return good_mock
Exemple #3
0
def get_chorale_note_distribution_and_grade(
    chorale_filename,
    plot_filename,
    major_note_distribution,
    minor_note_distribution,
    plot_title='Note distribution',
):
    """
    Arguments
        chorale_filename: String that holds path to a music21.stream.Stream
        plot_filename: String that holds where user wants the plot to be saved
        major_note_distribution: Counter holding note distribution of major keys
        minor_note_distribution: Counter holding note distribution of minor keys
        major_note_order: tuple holding order of notes to display on plot for major keys
        minor_note_order: tuple holding order of notes to display on plot for minor keys

    Saves a plot, and returns a score
    """
    chorale = parse_xml(chorale_filename)
    key = chorale.analyze('key')
    print(key)
    chorale_distribution = histogram_to_distribution(
        get_note_histogram(chorale, key))
    note_distribution = major_note_distribution if key.mode == 'major' else minor_note_distribution
    notes = MAJOR_NOTE_ORDER if key.mode == 'major' else MINOR_NOTE_ORDER

    chorale_list = distribution_to_list(chorale_distribution,
                                        note_distribution)[0]
    y_pos = np.arange(len(notes))
    y_vals = chorale_list

    plt.figure()
    plt.bar(y_pos, y_vals, align='center')
    plt.xticks(y_pos, notes)
    plt.xlabel('Scale Degree')
    plt.ylabel('Proportion')
    plt.title(plot_title)

    plt.savefig(plot_filename)
    return wasserstein_distance(
        *distribution_to_list(chorale_distribution, note_distribution))
def main(
    train,
    load,
    aug_gen,
    base,
    generate,
    overfitted,
    epoch,
    config,
    description,
    num_workers,
):
    # Use all gpus available
    gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())]
    print(f'Using GPUs {gpu_ids}')

    # Load config
    config_path = config
    config_module_name = os.path.splitext(config)[0].replace('/', '.')
    config = importlib.import_module(config_module_name).config

    from experiments.augmentative_generation import augmentative_generation
    from experiments.generate_and_grade import grade_folder, grade_constrained_mock, grade_unconstrained_mock
    from Grader.grader import Grader, FEATURES
    from Grader.helpers import get_threshold

    # set random seed
    seed(config['random_seed'])

    # compute time stamp
    if config['timestamp'] is not None:
        timestamp = config['timestamp']
    else:
        timestamp = datetime.now().strftime('%m-%d_%H:%M')
        config['timestamp'] = timestamp

    # set model_dir
    if load:
        model_dir = os.path.dirname(config_path)
    else:
        if config['savename'] is None:
            if aug_gen:
                config['savename'] = 'aug-gen'
            elif base:
                config['savename'] = 'base'
            else:
                config['savename'] = 'model'
        model_dir = f'models/{config["savename"]}_{timestamp}'

    # === Decoder ====
    print('Parsing XML Bach dataset')
    bach_dataset = [
        parse_xml(f'chorales/bach_chorales/{i}.xml') for i in tqdm(range(351))
    ]
    num_examples = len(bach_dataset)
    split = [0.8, 0.2]
    train_dataset = bach_dataset[:int(split[0] * num_examples)]
    val_dataset = bach_dataset[int(split[0] * num_examples):]
    dataloader_generator_kwargs = config['dataloader_generator_kwargs']

    train_dataloader_generator = SmallBachDataloaderGenerator(
        dataset_name='bach_train',
        chorales=train_dataset,
        include_transpositions=dataloader_generator_kwargs[
            'include_transpositions'],
        sequences_size=dataloader_generator_kwargs['sequences_size'],
    )

    val_dataloader_generator = SmallBachDataloaderGenerator(
        dataset_name='bach_val',
        chorales=val_dataset,
        include_transpositions=dataloader_generator_kwargs[
            'include_transpositions'],
        sequences_size=dataloader_generator_kwargs['sequences_size'],
    )

    data_processor = get_data_processor(
        dataloader_generator=train_dataloader_generator,
        data_processor_type=config['data_processor_type'],
        data_processor_kwargs=config['data_processor_kwargs'])

    decoder_kwargs = config['decoder_kwargs']
    num_channels = 4  # is this number of voices?
    num_events_grouped = 4
    num_events = dataloader_generator_kwargs['sequences_size'] * 4
    transformer = TransformerBach(
        model_dir=model_dir,
        train_dataloader_generator=train_dataloader_generator,
        val_dataloader_generator=val_dataloader_generator,
        data_processor=data_processor,
        d_model=decoder_kwargs['d_model'],
        num_encoder_layers=decoder_kwargs['num_encoder_layers'],
        num_decoder_layers=decoder_kwargs['num_decoder_layers'],
        n_head=decoder_kwargs['n_head'],
        dim_feedforward=decoder_kwargs['dim_feedforward'],
        dropout=decoder_kwargs['dropout'],
        positional_embedding_size=decoder_kwargs['positional_embedding_size'],
        num_channels=num_channels,
        num_events=num_events,
        num_events_grouped=num_events_grouped)

    if load:
        if overfitted:
            transformer.load(early_stopped=False)
        elif epoch:
            transformer.load(epoch=epoch)
        else:
            transformer.load(early_stopped=True)
        transformer.to('cuda')

    # copy .py config file and create README in the model directory before training
    if not load:
        ensure_dir(model_dir)
        shutil.copy(config_path, f'{model_dir}/config.py')
        transformer.to('cuda')

        with open(f'{model_dir}/README.txt', 'w') as readme:
            readme.write(description)
            readme.close()

    grader = Grader(
        features=FEATURES,
        iterator=bach_dataset,
    )

    if train:
        transformer.train_model(
            batch_size=config['batch_size'],
            num_batches=config['num_batches'],
            num_epochs=config['num_epochs'],
            lr=config['lr'],
            plot=True,
            num_workers=num_workers,
        )

    if aug_gen:
        threshold = get_threshold(
            data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv',
            column='grade',
            aggregate='75p',
        )
        augmentative_generation(
            transformer=transformer,
            grader=grader,
            config=config,
            num_workers=num_workers,
            bach_iterator=train_dataset,
            threshold=threshold,
        )

    if base:
        # base model
        augmentative_generation(transformer=transformer,
                                grader=grader,
                                config=config,
                                num_workers=num_workers,
                                bach_iterator=train_dataset,
                                threshold=float('inf'))

    if generate:
        grade_constrained_mock(
            grader=grader,
            transformer=transformer,
            output_dir=f'{transformer.model_dir}/constrained_mocks',
            bach_iterator=bach_dataset,
            num_generations=1,
        )
Exemple #5
0
"""
Script for grading an existing chorale in XML format, plus some example analysis from the grading function.
"""

import sys
sys.path[0] += '/../'
from Grader.grader import Grader, FEATURES
from Grader.compute_chorale_histograms import get_note_histogram
from Grader.distribution_helpers import histogram_to_distribution
from Grader.voice_leading_helpers import find_parallel_8ve_5th_errors
from transformer_bach.utils import parse_xml

# specify the chorale here (example code assumes Bach dataset has been created)
BACH_DIR = 'chorales/bach_chorales'
chorale = parse_xml(f'{BACH_DIR}/0.xml')

grader = Grader(
    # use default features (see paper)
    features=FEATURES,
    pickle_dir='original',
)

grade, feature_vector = grader.grade_chorale(chorale)
print(f'Grade: {grade}')

for f, g in zip(FEATURES, feature_vector):
    print(f'{f}: {g}')

# show the distribution of notes in the given chorale (this can be modified for other features)
key = chorale.analyze('key')
chorale_distribution = histogram_to_distribution(get_note_histogram(chorale, key))
# directory of ablation experiments
ABLATIONS_DIR = 'experiments/ablations'
# name of csv file that will be created in the directories of chorales
BACH_GRADES_CSV = 'bach_grades.csv'
MOCK_GRADES_CSV = 'mock_grades.csv'

ALL_ABLATIONS = {
    'original': FEATURES,
}
# ------------------------

for ablation in ALL_ABLATIONS:
    print(f'----- current ablation: {ablation} -----')
    print('Parsing XML to create bach_dataset')
    bach_dataset = [
        parse_xml(f'chorales/bach_chorales/{i}.xml') for i in tqdm(range(351))
    ]

    features = ALL_ABLATIONS[ablation]
    PICKLE_DIR = ablation
    ensure_dir(f'{ABLATIONS_DIR}/{PICKLE_DIR}/')

    # record the features in this ablation in features.txt
    with open(f'{ABLATIONS_DIR}/{PICKLE_DIR}/features.txt', 'w') as readme:
        readme.write('Features:\n')
        readme.write('\n'.join(features))

    # create Grader object with the features in this ablation
    grader = Grader(
        features=features,
        iterator=bach_dataset,