def build_bach_beat(dataset_manager, batch_size, subdivision, sequences_size, test_bool): metadatas = [ TickMetadata(subdivision=subdivision), FermataMetadata(), KeyMetadata() ] name = 'bach_chorales' if test_bool: name += '_test' bach_chorales_dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name=name, voice_ids=[0, 1, 2, 3], metadatas=metadatas, sequences_size=sequences_size, subdivision=subdivision ) (train_dataloader, val_dataloader, test_dataloader) = bach_chorales_dataset.data_loaders( batch_size=batch_size, cache_dir=dataset_manager.cache_dir, split=(0.85, 0.10) ) print('Num Train Batches: ', len(train_dataloader)) print('Num Valid Batches: ', len(val_dataloader)) print('Num Test Batches: ', len(test_dataloader))
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, batch_size, num_epochs, train, plot, log, lr): dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } mvae_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_test_kwargs) model = MeasureVAE(dataset=folk_dataset, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata) if train: if torch.cuda.is_available(): model.cuda() trainer = VAETrainer(dataset=folk_dataset, model=model, lr=lr) trainer.train_model(batch_size=batch_size, num_epochs=num_epochs, plot=plot, log=log) else: model.load() model.cuda() model.eval() tester = VAETester(dataset=folk_dataset_test, model=model) tester.test_model()
def main(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, linear_hidden_size, batch_size, num_epochs, train, num_iterations, sequence_length_ticks): dataset_manager = DatasetManager() metadatas = [ FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata() ] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4 } bach_chorales_dataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs ) dataset = bach_chorales_dataset deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size ) if train: deepbach.train(batch_size=batch_size, num_epochs=num_epochs) else: deepbach.load() deepbach.cuda() print('Generation') score, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, length=sequence_length_ticks, ) score.write('midi', fp = 'test.mid')
def main( note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_epochs, train, no_metadata, ): metadatas = [ TickMetadata(subdivision=4), ] dataset_manager = DatasetManager() chorale_dataset_kwargs = { 'voice_ids': [0], 'metadatas': metadatas, 'sequences_size': 20, 'subdivision': 4 } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) model = AnticipationRNN( chorale_dataset=bach_chorales_dataset, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=meta_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, no_metadata=no_metadata, ) if train: model.cuda() model.train_model(batch_size=batch_size, num_epochs=num_epochs) else: model.load() model.cuda() print('Fill') score, _, _ = model.fill(C3) score.show()
def main(include_transpositions): dataset_manager = DatasetManager() print('step 1/3: prepare dataset') metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': include_transpositions, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset get_pairs(dataset, model_ids=[5, 9])
def setup(self): """Load the model""" # music21.environment.set("musicxmlPath", "/bin/true") note_embedding_dim = 20 meta_embedding_dim = 20 num_layers = 2 lstm_hidden_size = 256 dropout_lstm = 0.5 linear_hidden_size = 256 batch_size = 256 num_epochs = 5 train = False num_iterations = 500 sequence_length_ticks = 64 dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { "voice_ids": [0, 1, 2, 3], "metadatas": metadatas, "sequences_size": 8, "subdivision": 4, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name="bach_chorales", **chorale_dataset_kwargs ) dataset = bach_chorales_dataset self.deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size, ) self.deepbach.load() # load fluidsynth fo rmidi 2 audio conversion self.fs = FluidSynth()
def init_app( note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, ): metadatas = [ TickMetadata(subdivision=4), ] dataset_manager = DatasetManager() chorale_dataset_kwargs = { 'voice_ids': [0], 'metadatas': metadatas, 'sequences_size': 20, 'subdivision': 4 } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) global model model = AnticipationRNN( chorale_dataset=bach_chorales_dataset, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=meta_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, ) model.load() model.cuda() # launch the script # accessible only locally: app.run()
def build_folk(dataset_manager, batch_size, subdivision, sequences_size): metadatas = [ BeatMarkerMetadata(subdivision=subdivision), TickMetadata(subdivision=subdivision) ] folk_dataset_kwargs = { 'metadatas': metadatas, 'sequences_size': sequences_size } folk_dataset: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars', **folk_dataset_kwargs ) (train_dataloader, val_dataloader, test_dataloader) = folk_dataset.data_loaders( batch_size=batch_size, split=(0.7, 0.2) ) print('Num Train Batches: ', len(train_dataloader)) print('Num Valid Batches: ', len(val_dataloader)) print('Num Test Batches: ', len(test_dataloader))
def get_dataset(dataset_manager, dataset_type, subdivision, sequence_size, velocity_quantization, max_transposition, num_heads, per_head_dim, local_position_embedding_dim, block_attention, group_instrument_per_section, nade, cpc_config_name, double_conditioning, instrument_presence_in_encoder): if dataset_type == 'bach': if nade: raise Exception( 'j ai l impression que nade c est nimps dans le data processor; check before using' ) metadatas = [ FermataMetadata(), TickMetadata(subdivision=subdivision), KeyMetadata() ] voices_ids = [0, 1, 2, 3] if cpc_config_name is not None: # notes to compute the first cpc code, we need to waste block_size tokens cpc_model = init_cpc_model(cpc_config_name) block_size = cpc_model.dataloader_generator.num_tokens_per_block // ( subdivision * len(voices_ids)) sequence_size += block_size chorale_dataset_kwargs = { 'voice_ids': voices_ids, 'metadatas': metadatas, 'sequences_size': sequence_size, 'subdivision': subdivision, } dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats', **chorale_dataset_kwargs) if cpc_config_name is None: processor_encoder = BachBeatsDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=True, monophonic_flag=False, nade_flag=nade) processor_decoder = BachBeatsDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=False, monophonic_flag=False, nade_flag=nade) else: processor_encoder = BachBeatsCPCDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=True, monophonic_flag=False, nade_flag=nade, cpc_model=cpc_model) processor_decoder = BachBeatsCPCDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=False, monophonic_flag=False, nade_flag=nade, cpc_model=cpc_model) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'bach_small': metadatas = [ FermataMetadata(), TickMetadata(subdivision=subdivision), KeyMetadata() ] voices_ids = [0, 1, 2, 3] if cpc_config_name is not None: # notes to compute the first cpc code, we need to waste block_size tokens cpc_model = init_cpc_model(cpc_config_name) num_tokens_per_block = cpc_model.dataloader_generator.num_tokens_per_block // ( subdivision * len(voices_ids)) sequence_size += num_tokens_per_block chorale_dataset_kwargs = { 'voice_ids': voices_ids, 'metadatas': metadatas, 'sequences_size': sequence_size, 'subdivision': subdivision, } dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats_test', **chorale_dataset_kwargs) if cpc_config_name is None: processor_encoder = BachBeatsDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=True, monophonic_flag=False, nade_flag=nade) processor_decoder = BachBeatsDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=False, monophonic_flag=False, nade_flag=nade) else: processor_encoder = BachBeatsCPCDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=True, monophonic_flag=False, nade_flag=nade, cpc_model=cpc_model) processor_decoder = BachBeatsCPCDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=False, monophonic_flag=False, nade_flag=nade, cpc_model=cpc_model) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'lsdb': # leadsheet_dataset_kwargs = { # 'sequences_size': 24, # } # leadsheet_dataset_kwargs = { # 'sequences_size': 32, # } leadsheet_dataset_kwargs = { 'sequences_size': 12, } dataset: LsdbDataset = dataset_manager.get_dataset( name='lsdb', **leadsheet_dataset_kwargs) processor_encoder = LsdbDataProcessor(dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8) processor_decoder = LsdbDataProcessor(dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_large': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement_large', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_small': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'integrate_discretization': True, 'alignement_type': 'complete', 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_small': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'integrate_discretization': True, 'alignement_type': 'complete', 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_midiPiano': # For now just try a small value, anyway exception if too small mean_number_messages_per_time_frame = 14 arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'mean_number_messages_per_time_frame': mean_number_messages_per_time_frame, 'integrate_discretization': True, 'alignement_type': 'complete', } dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset( name='arrangement_midiPiano', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_midiPiano_small': mean_number_messages_per_time_frame = 14 arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'mean_number_messages_per_time_frame': mean_number_messages_per_time_frame, 'integrate_discretization': True, 'alignement_type': 'complete' } dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset( name='arrangement_midiPiano_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_voice': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'integrate_discretization': True, 'alignement_type': 'complete', 'compute_statistics_flag': False, } dataset: ArrangementVoiceDataset = dataset_manager.get_dataset( name='arrangement_voice', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_voice_small': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'integrate_discretization': True, 'alignement_type': 'complete', 'compute_statistics_flag': False, } dataset: ArrangementVoiceDataset = dataset_manager.get_dataset( name='arrangement_voice_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder # elif dataset_type == 'arrangement_minimal': # # arrangement_dataset_kwargs = { # 'transpose_to_sounding_pitch': True, # 'subdivision': subdivision, # 'sequence_size': sequence_size, # 'velocity_quantization': velocity_quantization, # 'max_transposition': max_transposition, # 'compute_statistics_flag': False # } # dataset: ArrangementDataset = dataset_manager.get_dataset( # name='arrangement', # **arrangement_dataset_kwargs # ) # # reducer_input_dim = num_heads * per_head_dim # # processor_encoder = ArrangementDataProcessorMinimal(dataset=dataset, # embedding_dim=reducer_input_dim - local_position_embedding_dim, # reducer_input_dim=reducer_input_dim, # local_position_embedding_dim=local_position_embedding_dim, # flag_orchestra=False, # block_attention=block_attention) # # processor_decoder = ArrangementDataProcessorMinimal(dataset=dataset, # embedding_dim=reducer_input_dim - local_position_embedding_dim, # reducer_input_dim=reducer_input_dim, # local_position_embedding_dim=local_position_embedding_dim, # flag_orchestra=True, # block_attention=block_attention) # # processor_encodencoder = None # # return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'ar': dataset: ARDataset = ARDataset(phis=[0.9], length=128, c=0) # todo create BachTransformer and put BachBeats data processor in it processor_encoder = ARDataProcessor(dataset=dataset) processor_decoder = ARDataProcessor(dataset=dataset) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_categorical': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'group_instrument_per_section': group_instrument_per_section } dataset: ArrangementVoiceDataset = dataset_manager.get_dataset( name='arrangement_categorical', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionCategoricalDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionCategoricalDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_categorical_small': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'group_instrument_per_section': group_instrument_per_section } dataset: ArrangementVoiceDataset = dataset_manager.get_dataset( name='arrangement_categorical_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionCategoricalDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionCategoricalDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_midiPiano': # For now just try a small value, anyway exception if too small mean_number_messages_per_time_frame = 14 arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'mean_number_messages_per_time_frame': mean_number_messages_per_time_frame, 'integrate_discretization': True } dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset( name='arrangement_midiPiano', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_midiPiano_small': # Todo: compuyte value before ? # For now just try a small value, anyway exception if too small mean_number_messages_per_time_frame = 14 arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'mean_number_messages_per_time_frame': mean_number_messages_per_time_frame, 'integrate_discretization': True } dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset( name='arrangement_midiPiano_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder else: raise NotImplementedError
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size, latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_target, num_models): # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_dataset_vae: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs) # init vae model vae_model = MeasureVAE(dataset=folk_dataset_vae, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata) vae_model.load() # VAE model must be pre-trained if torch.cuda.is_available(): vae_model.cuda() folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset_train: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs) # init latent_rnn model and latent_rnn_tester latent_rnn_model = LatentRNN(dataset=folk_dataset_train, vae_model=vae_model, num_rnn_layers=num_latent_rnn_layers, rnn_hidden_size=latent_rnn_hidden_size, dropout=latent_rnn_dropout_prob, rnn_class=torch.nn.GRU, auto_reg=False, teacher_forcing=True) latent_rnn_model.load() # latent_rnn model must be pre-trained if torch.cuda.is_available(): latent_rnn_model.cuda() latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test, model=latent_rnn_model) # inti arnn model and arnn_testes arnn_model = ConstraintModelGaussianReg( dataset=folk_dataset_train, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=True) arnn_model.load() # ARNN model must be pre-trained if torch.cuda.is_available(): arnn_model.cuda() arnn_tester = AnticipationRNNTester(dataset=folk_dataset_test, model=arnn_model) arnn_baseline_model = AnticipationRNNBaseline( dataset=folk_dataset_train, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=True) arnn_baseline_model.load() # ARNN model must be pre-trained if torch.cuda.is_available(): arnn_baseline_model.cuda() arnn_baseline_tester = AnticipationRNNTester(dataset=folk_dataset_test, model=arnn_baseline_model) # create test dataloader (_, _, test_dataloader) = folk_dataset_test.data_loaders(batch_size=batch_size, split=(0.01, 0.01)) # test print('Num Test Batches: ', len(test_dataloader)) latent_rnn_mean_loss, latent_rnn_mean_accuracy, \ arnn_mean_loss, arnn_mean_accuracy, \ arnn_baseline_mean_loss, arnn_baseline_mean_accuracy = loss_and_acc_test( data_loader=test_dataloader, latent_rnn_tester=latent_rnn_tester, arnn_tester=arnn_tester, arnn_baseline_tester=arnn_baseline_tester, num_target_measures=num_target, num_models=num_models ) print('Test Epoch:') print('latent_rnn Test Loss: ', latent_rnn_mean_loss, '\n' 'latent_rnn Test Accuracy: ', latent_rnn_mean_accuracy * 100, '\n' 'ARNN Test Loss: ', arnn_mean_loss, '\n' 'ARNN Test Accuracy: ', arnn_mean_accuracy * 100, '\n' 'ARNN Baseline Test Loss: ', arnn_baseline_mean_loss, '\n' 'ARNN Baseline Test Accuracy: ', arnn_baseline_mean_accuracy * 100, '\n')
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size, latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_target, num_models): random.seed(0) # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_dataset_vae: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs) # init vae model vae_model = MeasureVAE(dataset=folk_dataset_vae, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata) vae_model.load() # VAE model must be pre-trained if torch.cuda.is_available(): vae_model.cuda() folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset_train: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs) # Initialize stuff test_filenames = folk_dataset_test.dataset_filenames num_melodies = 32 num_measures = 16 req_length = num_measures * 4 * 6 num_past = 6 num_future = 6 num_target = 4 cur_dir = os.path.dirname(os.path.realpath(__file__)) save_folder = 'saved_midi/' # First save original data for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] # save original scores save_filename = os.path.join(cur_dir, save_folder + f_id + '_original.mid') if os.path.isfile(save_filename): continue f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) metadata_tensor = folk_dataset_test.get_metadata_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] metadata_tensor = metadata_tensor[:, :req_length, :] trunc_score = folk_dataset_test.tensor_to_score(score_tensor) trunc_score.write('midi', fp=save_filename) # Initialize models and testers latent_rnn_model = LatentRNN(dataset=folk_dataset_train, vae_model=vae_model, num_rnn_layers=num_latent_rnn_layers, rnn_hidden_size=latent_rnn_hidden_size, dropout=latent_rnn_dropout_prob, rnn_class=torch.nn.GRU, auto_reg=False, teacher_forcing=True) latent_rnn_model.load() # Latent RNN model must be pre-trained if torch.cuda.is_available(): latent_rnn_model.cuda() latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test, model=latent_rnn_model) def process_latent_rnn_batch(score_tensor, num_past=6, num_future=6, num_target=4): assert (num_past + num_future + num_target == 16) score_tensor = score_tensor.unsqueeze(0) score_tensor = LatentRNNTrainer.split_to_measures(score_tensor, 24) tensor_past, tensor_future, tensor_target = LatentRNNTrainer.split_score( score_tensor=score_tensor, num_past=num_past, num_future=num_future, num_target=num_target, measure_seq_len=24) return tensor_past, tensor_future, tensor_target # Second save latent_rnn generations for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] save_filename = os.path.join(cur_dir, save_folder + f_id + '_latent_rnn.mid') if os.path.isfile(save_filename): continue f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) # metadata_tensor = folk_dataset_test.get_metadata_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] # metadata_tensor = metadata_tensor[:, :req_length, :] # save regeneration using latent_rnn tensor_past, tensor_future, tensor_target = process_latent_rnn_batch( score_tensor, num_past, num_future, num_target) # forward pass through latent_rnn weights, gen_target, _ = latent_rnn_tester.model( past_context=tensor_past, future_context=tensor_future, target=tensor_target, measures_to_generate=num_target, train=False, ) # convert to score batch_size, _, _ = gen_target.size() gen_target = gen_target.view(batch_size, num_target, 24) gen_score_tensor = torch.cat((tensor_past, gen_target, tensor_future), 1) latent_rnn_score = folk_dataset_test.tensor_to_score( gen_score_tensor.cpu()) latent_rnn_score.write('midi', fp=save_filename) # Intialize arnn model and arnn_tester arnn_model = ConstraintModelGaussianReg( dataset=folk_dataset_train, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=True) arnn_model.load() # ARNN model must be pre-trained if torch.cuda.is_available(): arnn_model.cuda() arnn_tester = AnticipationRNNTester(dataset=folk_dataset_test, model=arnn_model) def process_arnn_batch(score_tensor, metadata_tensor, arnn_tester, num_past=6, num_target=4): score_tensor = score_tensor.unsqueeze(0) metadata_tensor = metadata_tensor.unsqueeze(0) tensor_score = to_cuda_variable_long(score_tensor) tensor_metadata = to_cuda_variable_long(metadata_tensor) constraints_location, start_tick, end_tick = arnn_tester.get_constraints_location( tensor_score, is_stochastic=False, start_measure=num_past, num_measures=num_target) arnn_batch = (tensor_score, tensor_metadata, constraints_location, start_tick, end_tick) return arnn_batch # Third save ARNN-Reg generations for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] save_filename = os.path.join(cur_dir, save_folder + f_id + '_arnn_reg.mid') if os.path.isfile(save_filename): continue f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) metadata_tensor = folk_dataset_test.get_metadata_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] metadata_tensor = metadata_tensor[:, :req_length, :] # save regeneration using latent_rnn tensor_score, tensor_metadata, constraints_location, start_tick, end_tick = \ process_arnn_batch(score_tensor, metadata_tensor, arnn_tester, num_past, num_target) # forward pass through latent_rnn _, gen_target = arnn_tester.model.forward_inpaint( score_tensor=tensor_score, metadata_tensor=tensor_metadata, constraints_loc=constraints_location, start_tick=start_tick, end_tick=end_tick, ) # convert to score arnn_score = folk_dataset_test.tensor_to_score(gen_target.cpu()) arnn_score.write('midi', fp=save_filename) # Intialize arnn-baseline model and arnn_tester arnn_baseline_model = AnticipationRNNBaseline( dataset=folk_dataset_train, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=True) arnn_baseline_model.load() # ARNN model must be pre-trained if torch.cuda.is_available(): arnn_baseline_model.cuda() arnn_baseline_tester = AnticipationRNNTester(dataset=folk_dataset_test, model=arnn_baseline_model) # Fourth save ARNN-Baseline generations for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] save_filename = os.path.join(cur_dir, save_folder + f_id + '_arnn_baseline.mid') if os.path.isfile(save_filename): continue f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) metadata_tensor = folk_dataset_test.get_metadata_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] metadata_tensor = metadata_tensor[:, :req_length, :] # save regeneration using latent_rnn tensor_score, tensor_metadata, constraints_location, start_tick, end_tick = \ process_arnn_batch(score_tensor, metadata_tensor, arnn_baseline_tester, num_past, num_target) # forward pass through latent_rnn _, gen_target = arnn_baseline_tester.model.forward_inpaint( score_tensor=tensor_score, metadata_tensor=tensor_metadata, constraints_loc=constraints_location, start_tick=start_tick, end_tick=end_tick, ) # convert to score arnn_baseline_score = folk_dataset_test.tensor_to_score( gen_target.cpu()) arnn_baseline_score.write('midi', fp=save_filename)
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size, latent_rnn_dropout_prob, batch_size, num_epochs, train, lr, plot, log, auto_reg, teacher_forcing, early_stop ): # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_dataset_vae: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs ) # init vae model vae_model = MeasureVAE( dataset=folk_dataset_vae, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata ) vae_model.load() # VAE model must be pre-trained folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset_train: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs ) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs ) # init latent_rnn model model = LatentRNN( dataset=folk_dataset_train, vae_model=vae_model, num_rnn_layers=num_latent_rnn_layers, rnn_hidden_size=latent_rnn_hidden_size, dropout=latent_rnn_dropout_prob, rnn_class=torch.nn.GRU, auto_reg=auto_reg, teacher_forcing=teacher_forcing ) if train: if torch.cuda.is_available(): model.cuda() trainer = LatentRNNTrainer( dataset=folk_dataset_train, model=model, lr=lr, early_stopping=early_stop ) trainer.train_model( batch_size=batch_size, num_epochs=num_epochs, plot=plot, log=log ) else: model.load() model.cuda() model.eval() tester = LatentRNNTester( dataset=folk_dataset_test, model=model ) tester.test_model( batch_size=batch_size ) gen_score, score, original_score = tester.generation_random( tensor_score=None, start_measure=8, num_measures_gen=2 ) print( " --- score --- " ) print( score ) gen_score.show() original_score.show() gen_score2, score, original_score2 = tester.generation_test() gen_score2.show() original_score2.show() print( " --- score --- " ) print( score )
def main(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, linear_hidden_size, batch_size, num_epochs, train, update, num_iterations, sequence_length_ticks, model_id, include_transpositions, update_iterations, generations_per_iteration, num_generations, score_chorales, write_scores): print('step 1/3: prepare dataset') dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': include_transpositions, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset load_or_pickle_distributions(dataset) print('step 2/3: prepare model') print(f'Model ID: {model_id}') deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size, model_id=model_id, ) if train: print('step 2a/3: train base model') deepbach.train(batch_size=batch_size, num_epochs=num_epochs, split=[0.85, 0.15]) else: print('step 2a/3: load model') deepbach.load() deepbach.cuda() if update: print( f'step 2b/3: update base model over {update_iterations} iterations' ) thres = get_threshold('data/chorale_scores.csv', col=-1) print(f'Threshold for selection: {thres}') update_file = open('data/update_scores.csv', 'w') reader = csv.writer(update_file) reader.writerow(['iteration', 'chorale ID', 'score']) for i in range(update_iterations): print(f'----------- Iteration {i} -----------') picked_chorales = [] num_picked_chorales = 0 ensure_dir(f'generations/{model_id}/{i}') for j in tqdm(range(generations_per_iteration)): chorale, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, sequence_length_ticks=sequence_length_ticks, ) score = score_chorale(chorale, dataset) # write data to csv file reader.writerow([i, j, score]) # iteration, generation #, score # worst Bach chorale score rounded up to nearest .01 if score > thres: print(f'Picked chorale {j} with score {score}') picked_chorales.append(chorale) num_picked_chorales += 1 chorale.write('midi', f'generations/{model_id}/{i}/c{j}.mid') print( f'Number of picked chorales for iteration {i}: {num_picked_chorales}' ) if num_picked_chorales == 0: continue all_datasets.update({ f'generated_chorales_{i}': { 'dataset_class_name': ChoraleDataset, 'corpus_it_gen': GeneratedChoraleIteratorGen(picked_chorales) } }) generated_dataset: ChoraleDataset = dataset_manager.get_dataset( name=f'generated_chorales_{i}', index2note_dicts=dataset.index2note_dicts, note2index_dicts=dataset.note2index_dicts, voice_ranges=dataset.voice_ranges, **chorale_dataset_kwargs) deepbach.dataset = generated_dataset deepbach.train( batch_size=batch_size, num_epochs=2, split=[1, 0], # use all selected chorales for training early_stopping=False) # generate chorales if score_chorales: chorale_scores = {} print('Scoring real chorales') for chorale_id, chorale in tqdm(enumerate(dataset.iterator_gen()), total=num_generations): score = score_chorale(chorale, dataset) chorale_scores[chorale_id] = score if chorale_id == num_generations: break # write scores to file if write_scores: with open('data/chorale_scores.csv', 'w') as chorale_file: reader = csv.writer(chorale_file) reader.writerow(['', 'score'] + list(weights.keys())) for id, value in chorale_scores.items(): reader.writerow([id, value]) if num_generations != 0: generation_scores = {} print('Generating and scoring generated chorales') ensure_dir(f'generations/{model_id}') for i in range(num_generations): chorale, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, sequence_length_ticks=sequence_length_ticks, ) chorale.write('midi', f'generations/{model_id}/c{i}.mid') score = score_chorale(chorale, dataset) generation_scores[i] = score # write scores to file if write_scores: with open(f'data/model{model_id}_scores.csv', 'w') as generation_file: reader = csv.writer(generation_file) reader.writerow(['', 'score'] + list(weights.keys())) for id, value in generation_scores.items(): reader.writerow([id, value])
num_datapoints, 1, length, num_metadata) dataset = TensorDataset(score_tensor_dataset, metadata_tensor_dataset) print(f'Sizes: {score_tensor_dataset.size()}') print(f'Sizes: {metadata_tensor_dataset.size()}') return dataset if __name__ == '__main__': from DatasetManager.dataset_manager import DatasetManager from DatasetManager.metadata import BeatMarkerMetadata, TickMetadata dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] folk_dataset_kwargs = {'metadatas': metadatas, 'sequences_size': 32} folk_dataset: FolkDataset = dataset_manager.get_dataset( name='folk_4by4measures_test', **folk_dataset_kwargs) (train_dataloader, val_dataloader, test_dataloader) = folk_dataset.data_loaders(batch_size=100, split=(0.7, 0.2)) print('Num Train Batches: ', len(train_dataloader)) print('Num Valid Batches: ', len(val_dataloader)) print('Num Test Batches: ', len(test_dataloader)) for sample_id, (score, _) in tqdm(enumerate(train_dataloader)): score = score.long() if torch.cuda.is_available(): score = torch.autograd.Variable(score.cuda())
from grader.grader import score_chorale from DatasetManager.chorale_dataset import ChoraleDataset from DatasetManager.dataset_manager import DatasetManager, all_datasets from DatasetManager.metadata import FermataMetadata, TickMetadata, KeyMetadata from DatasetManager.helpers import GeneratedChoraleIteratorGen from DeepBach.model_manager import DeepBach from DeepBach.helpers import * print('step 1/3: prepare dataset') dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [1, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': False, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset load_or_pickle_distributions(dataset) print(dataset.gaussian.covariances_) # chorale = converter.parse('generations/6/c187.mid') # score = score_chorale(chorale, dataset) # print(score)
# Piano score _piano = None # Orchestra init (before even the first pass of the model, i.e. filled with MASK and REST symbols _orchestra_init = None _orchestra_silenced_instruments = None # TODO use this parameter or extract it from the metadata somehow timesignature = music21.meter.TimeSignature('4/4') # generation parameters # todo put in click? batch_size_per_voice = 8 metadatas = [ FermataMetadata(), TickMetadata(subdivision=_subdivision), KeyMetadata() ] # def get_fermatas_tensor(metadata_tensor: torch.Tensor) -> torch.Tensor: # """ # Extract the fermatas tensor from a metadata tensor # """ # fermatas_index = [m.__class__ for m in metadatas].index( # FermataMetadata().__class__) # # fermatas are shared across all voices so we only consider the first voice # soprano_voice_metadata = metadata_tensor[0] # # # `soprano_voice_metadata` has shape # # `(sequence_duration, len(metadatas + 1))` (accouting for the voice # # index metadata)
deepbach = None _num_iterations = None _sequence_length_ticks = None _ticks_per_quarter = None # TODO use this parameter or extract it from the metadata somehow timesignature = music21.meter.TimeSignature('4/4') # generation parameters # todo put in click? batch_size_per_voice = 8 metadatas = [ FermataMetadata(), TickMetadata(subdivision=_ticks_per_quarter), KeyMetadata() ] def get_fermatas_tensor(metadata_tensor: torch.Tensor) -> torch.Tensor: """ Extract the fermatas tensor from a metadata tensor """ fermatas_index = [m.__class__ for m in metadatas].index(FermataMetadata().__class__) # fermatas are shared across all voices so we only consider the first voice soprano_voice_metadata = metadata_tensor[0] # `soprano_voice_metadata` has shape # `(sequence_duration, len(metadatas + 1))` (accouting for the voice
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size, latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_target, num_models): random.seed(0) # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_dataset_vae: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs) # init vae model vae_model = MeasureVAE(dataset=folk_dataset_vae, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata) vae_model.load() # VAE model must be pre-trained if torch.cuda.is_available(): vae_model.cuda() folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset_train: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs) # Initialize stuff test_filenames = folk_dataset_test.dataset_filenames num_melodies = 32 num_measures = 16 req_length = num_measures * 4 * 6 num_past = 6 num_future = 6 num_target = 4 cur_dir = os.path.dirname(os.path.realpath(__file__)) save_folder = 'saved_midi/' # Initialize models and testers latent_rnn_model = LatentRNN(dataset=folk_dataset_train, vae_model=vae_model, num_rnn_layers=num_latent_rnn_layers, rnn_hidden_size=latent_rnn_hidden_size, dropout=latent_rnn_dropout_prob, rnn_class=torch.nn.GRU, auto_reg=False, teacher_forcing=True) latent_rnn_model.load() # latent_rnn model must be pre-trained if torch.cuda.is_available(): latent_rnn_model.cuda() latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test, model=latent_rnn_model) def process_latent_rnn_batch(score_tensor, num_past=6, num_future=6, num_target=4): assert (num_past + num_future + num_target == 16) score_tensor = score_tensor.unsqueeze(0) score_tensor = LatentRNNTrainer.split_to_measures(score_tensor, 24) tensor_past, tensor_future, tensor_target = LatentRNNTrainer.split_score( score_tensor=score_tensor, num_past=num_past, num_future=num_future, num_target=num_target, measure_seq_len=24) return tensor_past, tensor_future, tensor_target # Second save latent_rnn generations for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] if f_id == 'tune_16154': for j in range(15): save_filename = os.path.join( cur_dir, save_folder + f_id + '_' + str(j) + '_latent_rnn.mid') f = os.path.join( folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] # metadata_tensor = metadata_tensor[:, :req_length, :] # save regeneration using latent_rnn tensor_past, tensor_future, tensor_target = process_latent_rnn_batch( score_tensor, num_past, num_future, num_target) # forward pass through latent_rnn weights, gen_target, _ = latent_rnn_tester.model( past_context=tensor_past, future_context=tensor_future, target=tensor_target, measures_to_generate=num_target, train=False, ) # convert to score batch_size, _, _ = gen_target.size() gen_target = gen_target.view(batch_size, num_target, 24) gen_score_tensor = torch.cat( (tensor_past, gen_target, tensor_future), 1) latent_rnn_score = folk_dataset_test.tensor_to_score( gen_score_tensor.cpu()) latent_rnn_score.write('midi', fp=save_filename)