def grade_folder(chorale_dir, grader, grades_csv=None): """ Arguments: chorale_dir: directory of chorales named 0.xml, 1.xml, etc. grader: Grader object grades_csv: file to write grades to grade a folder of chorales """ print(f'Grading chorales in {chorale_dir}') grades = [] num_chorales = int( np.sum([ 1 for fname in os.listdir(chorale_dir) if fname.endswith('.xml') ])) for chorale_idx in tqdm(range(num_chorales)): chorale_xml = f'{chorale_dir}/{chorale_idx}.xml' score = parse_xml(chorale_xml) grade, chorale_vector = grader.grade_chorale(score) grades.append([grade, *chorale_vector]) if grades_csv is None: grades_csv = f'{chorale_dir}/grades.csv' print(f'Writing data to {chorale_dir}/{grades_csv}') with open(f'{chorale_dir}/{grades_csv}', 'w') as chorale_file: writer = csv.writer(chorale_file) writer.writerow(['', 'grade'] + grader.features) for i, grades in enumerate(grades): writer.writerow([i, *grades])
def get_good_mocks(model_dir): good_mock = [] grades_df = pd.read_csv(f'{model_dir}/grades.csv') grades_df = grades_df.loc[grades_df['epoch'] <= 22] for row in grades_df.itertuples(index=False): if row.grade > get_threshold(): epoch = row.epoch gen_id = row.gen_id score = parse_xml(f'{model_dir}/generations/{epoch}/{gen_id}.xml') good_mock.append(score) return good_mock
def get_chorale_note_distribution_and_grade( chorale_filename, plot_filename, major_note_distribution, minor_note_distribution, plot_title='Note distribution', ): """ Arguments chorale_filename: String that holds path to a music21.stream.Stream plot_filename: String that holds where user wants the plot to be saved major_note_distribution: Counter holding note distribution of major keys minor_note_distribution: Counter holding note distribution of minor keys major_note_order: tuple holding order of notes to display on plot for major keys minor_note_order: tuple holding order of notes to display on plot for minor keys Saves a plot, and returns a score """ chorale = parse_xml(chorale_filename) key = chorale.analyze('key') print(key) chorale_distribution = histogram_to_distribution( get_note_histogram(chorale, key)) note_distribution = major_note_distribution if key.mode == 'major' else minor_note_distribution notes = MAJOR_NOTE_ORDER if key.mode == 'major' else MINOR_NOTE_ORDER chorale_list = distribution_to_list(chorale_distribution, note_distribution)[0] y_pos = np.arange(len(notes)) y_vals = chorale_list plt.figure() plt.bar(y_pos, y_vals, align='center') plt.xticks(y_pos, notes) plt.xlabel('Scale Degree') plt.ylabel('Proportion') plt.title(plot_title) plt.savefig(plot_filename) return wasserstein_distance( *distribution_to_list(chorale_distribution, note_distribution))
def main( train, load, aug_gen, base, generate, overfitted, epoch, config, description, num_workers, ): # Use all gpus available gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())] print(f'Using GPUs {gpu_ids}') # Load config config_path = config config_module_name = os.path.splitext(config)[0].replace('/', '.') config = importlib.import_module(config_module_name).config from experiments.augmentative_generation import augmentative_generation from experiments.generate_and_grade import grade_folder, grade_constrained_mock, grade_unconstrained_mock from Grader.grader import Grader, FEATURES from Grader.helpers import get_threshold # set random seed seed(config['random_seed']) # compute time stamp if config['timestamp'] is not None: timestamp = config['timestamp'] else: timestamp = datetime.now().strftime('%m-%d_%H:%M') config['timestamp'] = timestamp # set model_dir if load: model_dir = os.path.dirname(config_path) else: if config['savename'] is None: if aug_gen: config['savename'] = 'aug-gen' elif base: config['savename'] = 'base' else: config['savename'] = 'model' model_dir = f'models/{config["savename"]}_{timestamp}' # === Decoder ==== print('Parsing XML Bach dataset') bach_dataset = [ parse_xml(f'chorales/bach_chorales/{i}.xml') for i in tqdm(range(351)) ] num_examples = len(bach_dataset) split = [0.8, 0.2] train_dataset = bach_dataset[:int(split[0] * num_examples)] val_dataset = bach_dataset[int(split[0] * num_examples):] dataloader_generator_kwargs = config['dataloader_generator_kwargs'] train_dataloader_generator = SmallBachDataloaderGenerator( dataset_name='bach_train', chorales=train_dataset, include_transpositions=dataloader_generator_kwargs[ 'include_transpositions'], sequences_size=dataloader_generator_kwargs['sequences_size'], ) val_dataloader_generator = SmallBachDataloaderGenerator( dataset_name='bach_val', chorales=val_dataset, include_transpositions=dataloader_generator_kwargs[ 'include_transpositions'], sequences_size=dataloader_generator_kwargs['sequences_size'], ) data_processor = get_data_processor( dataloader_generator=train_dataloader_generator, data_processor_type=config['data_processor_type'], data_processor_kwargs=config['data_processor_kwargs']) decoder_kwargs = config['decoder_kwargs'] num_channels = 4 # is this number of voices? num_events_grouped = 4 num_events = dataloader_generator_kwargs['sequences_size'] * 4 transformer = TransformerBach( model_dir=model_dir, train_dataloader_generator=train_dataloader_generator, val_dataloader_generator=val_dataloader_generator, data_processor=data_processor, d_model=decoder_kwargs['d_model'], num_encoder_layers=decoder_kwargs['num_encoder_layers'], num_decoder_layers=decoder_kwargs['num_decoder_layers'], n_head=decoder_kwargs['n_head'], dim_feedforward=decoder_kwargs['dim_feedforward'], dropout=decoder_kwargs['dropout'], positional_embedding_size=decoder_kwargs['positional_embedding_size'], num_channels=num_channels, num_events=num_events, num_events_grouped=num_events_grouped) if load: if overfitted: transformer.load(early_stopped=False) elif epoch: transformer.load(epoch=epoch) else: transformer.load(early_stopped=True) transformer.to('cuda') # copy .py config file and create README in the model directory before training if not load: ensure_dir(model_dir) shutil.copy(config_path, f'{model_dir}/config.py') transformer.to('cuda') with open(f'{model_dir}/README.txt', 'w') as readme: readme.write(description) readme.close() grader = Grader( features=FEATURES, iterator=bach_dataset, ) if train: transformer.train_model( batch_size=config['batch_size'], num_batches=config['num_batches'], num_epochs=config['num_epochs'], lr=config['lr'], plot=True, num_workers=num_workers, ) if aug_gen: threshold = get_threshold( data_file='experiments/ablations/reg_pe_no_oe/bach_grades.csv', column='grade', aggregate='75p', ) augmentative_generation( transformer=transformer, grader=grader, config=config, num_workers=num_workers, bach_iterator=train_dataset, threshold=threshold, ) if base: # base model augmentative_generation(transformer=transformer, grader=grader, config=config, num_workers=num_workers, bach_iterator=train_dataset, threshold=float('inf')) if generate: grade_constrained_mock( grader=grader, transformer=transformer, output_dir=f'{transformer.model_dir}/constrained_mocks', bach_iterator=bach_dataset, num_generations=1, )
""" Script for grading an existing chorale in XML format, plus some example analysis from the grading function. """ import sys sys.path[0] += '/../' from Grader.grader import Grader, FEATURES from Grader.compute_chorale_histograms import get_note_histogram from Grader.distribution_helpers import histogram_to_distribution from Grader.voice_leading_helpers import find_parallel_8ve_5th_errors from transformer_bach.utils import parse_xml # specify the chorale here (example code assumes Bach dataset has been created) BACH_DIR = 'chorales/bach_chorales' chorale = parse_xml(f'{BACH_DIR}/0.xml') grader = Grader( # use default features (see paper) features=FEATURES, pickle_dir='original', ) grade, feature_vector = grader.grade_chorale(chorale) print(f'Grade: {grade}') for f, g in zip(FEATURES, feature_vector): print(f'{f}: {g}') # show the distribution of notes in the given chorale (this can be modified for other features) key = chorale.analyze('key') chorale_distribution = histogram_to_distribution(get_note_histogram(chorale, key))
# directory of ablation experiments ABLATIONS_DIR = 'experiments/ablations' # name of csv file that will be created in the directories of chorales BACH_GRADES_CSV = 'bach_grades.csv' MOCK_GRADES_CSV = 'mock_grades.csv' ALL_ABLATIONS = { 'original': FEATURES, } # ------------------------ for ablation in ALL_ABLATIONS: print(f'----- current ablation: {ablation} -----') print('Parsing XML to create bach_dataset') bach_dataset = [ parse_xml(f'chorales/bach_chorales/{i}.xml') for i in tqdm(range(351)) ] features = ALL_ABLATIONS[ablation] PICKLE_DIR = ablation ensure_dir(f'{ABLATIONS_DIR}/{PICKLE_DIR}/') # record the features in this ablation in features.txt with open(f'{ABLATIONS_DIR}/{PICKLE_DIR}/features.txt', 'w') as readme: readme.write('Features:\n') readme.write('\n'.join(features)) # create Grader object with the features in this ablation grader = Grader( features=features, iterator=bach_dataset,