def build_bach_beat(dataset_manager, batch_size, subdivision, sequences_size, test_bool): metadatas = [ TickMetadata(subdivision=subdivision), FermataMetadata(), KeyMetadata() ] name = 'bach_chorales' if test_bool: name += '_test' bach_chorales_dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name=name, voice_ids=[0, 1, 2, 3], metadatas=metadatas, sequences_size=sequences_size, subdivision=subdivision ) (train_dataloader, val_dataloader, test_dataloader) = bach_chorales_dataset.data_loaders( batch_size=batch_size, cache_dir=dataset_manager.cache_dir, split=(0.85, 0.10) ) print('Num Train Batches: ', len(train_dataloader)) print('Num Valid Batches: ', len(val_dataloader)) print('Num Test Batches: ', len(test_dataloader))
def main(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, linear_hidden_size, batch_size, num_epochs, train, num_iterations, sequence_length_ticks): dataset_manager = DatasetManager() metadatas = [ FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata() ] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4 } bach_chorales_dataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs ) dataset = bach_chorales_dataset deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size ) if train: deepbach.train(batch_size=batch_size, num_epochs=num_epochs) else: deepbach.load() deepbach.cuda() print('Generation') score, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, length=sequence_length_ticks, ) score.write('midi', fp = 'test.mid')
def get_fermatas_tensor(metadata_tensor: torch.Tensor) -> torch.Tensor: """ Extract the fermatas tensor from a metadata tensor """ fermatas_index = [m.__class__ for m in metadatas].index(FermataMetadata().__class__) # fermatas are shared across all voices so we only consider the first voice soprano_voice_metadata = metadata_tensor[0] # `soprano_voice_metadata` has shape # `(sequence_duration, len(metadatas + 1))` (accouting for the voice # index metadata) # Extract fermatas for all steps return soprano_voice_metadata[:, fermatas_index]
def main(include_transpositions): dataset_manager = DatasetManager() print('step 1/3: prepare dataset') metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': include_transpositions, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset get_pairs(dataset, model_ids=[5, 9])
def setup(self): """Load the model""" # music21.environment.set("musicxmlPath", "/bin/true") note_embedding_dim = 20 meta_embedding_dim = 20 num_layers = 2 lstm_hidden_size = 256 dropout_lstm = 0.5 linear_hidden_size = 256 batch_size = 256 num_epochs = 5 train = False num_iterations = 500 sequence_length_ticks = 64 dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { "voice_ids": [0, 1, 2, 3], "metadatas": metadatas, "sequences_size": 8, "subdivision": 4, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name="bach_chorales", **chorale_dataset_kwargs ) dataset = bach_chorales_dataset self.deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size, ) self.deepbach.load() # load fluidsynth fo rmidi 2 audio conversion self.fs = FluidSynth()
mp3_response_headers = {"Content-Type": "audio/mpeg3"} deepbach = None _num_iterations = None _sequence_length_ticks = None _ticks_per_quarter = None # TODO use this parameter or extract it from the metadata somehow timesignature = music21.meter.TimeSignature('4/4') # generation parameters # todo put in click? batch_size_per_voice = 8 metadatas = [ FermataMetadata(), TickMetadata(subdivision=_ticks_per_quarter), KeyMetadata() ] def get_fermatas_tensor(metadata_tensor: torch.Tensor) -> torch.Tensor: """ Extract the fermatas tensor from a metadata tensor """ fermatas_index = [m.__class__ for m in metadatas].index(FermataMetadata().__class__) # fermatas are shared across all voices so we only consider the first voice soprano_voice_metadata = metadata_tensor[0] # `soprano_voice_metadata` has shape
def get_dataset(dataset_manager, dataset_type, subdivision, sequence_size, velocity_quantization, max_transposition, num_heads, per_head_dim, local_position_embedding_dim, block_attention, group_instrument_per_section, nade, cpc_config_name, double_conditioning, instrument_presence_in_encoder): if dataset_type == 'bach': if nade: raise Exception( 'j ai l impression que nade c est nimps dans le data processor; check before using' ) metadatas = [ FermataMetadata(), TickMetadata(subdivision=subdivision), KeyMetadata() ] voices_ids = [0, 1, 2, 3] if cpc_config_name is not None: # notes to compute the first cpc code, we need to waste block_size tokens cpc_model = init_cpc_model(cpc_config_name) block_size = cpc_model.dataloader_generator.num_tokens_per_block // ( subdivision * len(voices_ids)) sequence_size += block_size chorale_dataset_kwargs = { 'voice_ids': voices_ids, 'metadatas': metadatas, 'sequences_size': sequence_size, 'subdivision': subdivision, } dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats', **chorale_dataset_kwargs) if cpc_config_name is None: processor_encoder = BachBeatsDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=True, monophonic_flag=False, nade_flag=nade) processor_decoder = BachBeatsDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=False, monophonic_flag=False, nade_flag=nade) else: processor_encoder = BachBeatsCPCDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=True, monophonic_flag=False, nade_flag=nade, cpc_model=cpc_model) processor_decoder = BachBeatsCPCDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=False, monophonic_flag=False, nade_flag=nade, cpc_model=cpc_model) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'bach_small': metadatas = [ FermataMetadata(), TickMetadata(subdivision=subdivision), KeyMetadata() ] voices_ids = [0, 1, 2, 3] if cpc_config_name is not None: # notes to compute the first cpc code, we need to waste block_size tokens cpc_model = init_cpc_model(cpc_config_name) num_tokens_per_block = cpc_model.dataloader_generator.num_tokens_per_block // ( subdivision * len(voices_ids)) sequence_size += num_tokens_per_block chorale_dataset_kwargs = { 'voice_ids': voices_ids, 'metadatas': metadatas, 'sequences_size': sequence_size, 'subdivision': subdivision, } dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats_test', **chorale_dataset_kwargs) if cpc_config_name is None: processor_encoder = BachBeatsDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=True, monophonic_flag=False, nade_flag=nade) processor_decoder = BachBeatsDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=False, monophonic_flag=False, nade_flag=nade) else: processor_encoder = BachBeatsCPCDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=True, monophonic_flag=False, nade_flag=nade, cpc_model=cpc_model) processor_decoder = BachBeatsCPCDataProcessor( dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8, encoder_flag=False, monophonic_flag=False, nade_flag=nade, cpc_model=cpc_model) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'lsdb': # leadsheet_dataset_kwargs = { # 'sequences_size': 24, # } # leadsheet_dataset_kwargs = { # 'sequences_size': 32, # } leadsheet_dataset_kwargs = { 'sequences_size': 12, } dataset: LsdbDataset = dataset_manager.get_dataset( name='lsdb', **leadsheet_dataset_kwargs) processor_encoder = LsdbDataProcessor(dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8) processor_decoder = LsdbDataProcessor(dataset=dataset, embedding_dim=512 - 8, reducer_input_dim=512, local_position_embedding_dim=8) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_large': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement_large', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_small': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'integrate_discretization': True, 'alignement_type': 'complete', 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_small': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'integrate_discretization': True, 'alignement_type': 'complete', 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_midiPiano': # For now just try a small value, anyway exception if too small mean_number_messages_per_time_frame = 14 arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'mean_number_messages_per_time_frame': mean_number_messages_per_time_frame, 'integrate_discretization': True, 'alignement_type': 'complete', } dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset( name='arrangement_midiPiano', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_midiPiano_small': mean_number_messages_per_time_frame = 14 arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'mean_number_messages_per_time_frame': mean_number_messages_per_time_frame, 'integrate_discretization': True, 'alignement_type': 'complete' } dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset( name='arrangement_midiPiano_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_voice': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'integrate_discretization': True, 'alignement_type': 'complete', 'compute_statistics_flag': False, } dataset: ArrangementVoiceDataset = dataset_manager.get_dataset( name='arrangement_voice', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'arrangement_voice_small': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'integrate_discretization': True, 'alignement_type': 'complete', 'compute_statistics_flag': False, } dataset: ArrangementVoiceDataset = dataset_manager.get_dataset( name='arrangement_voice_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_decoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) processor_encodencoder = ArrangementVoiceDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='instruments', block_attention=block_attention, nade=nade, double_conditioning=double_conditioning) return dataset, processor_decoder, processor_encoder, processor_encodencoder # elif dataset_type == 'arrangement_minimal': # # arrangement_dataset_kwargs = { # 'transpose_to_sounding_pitch': True, # 'subdivision': subdivision, # 'sequence_size': sequence_size, # 'velocity_quantization': velocity_quantization, # 'max_transposition': max_transposition, # 'compute_statistics_flag': False # } # dataset: ArrangementDataset = dataset_manager.get_dataset( # name='arrangement', # **arrangement_dataset_kwargs # ) # # reducer_input_dim = num_heads * per_head_dim # # processor_encoder = ArrangementDataProcessorMinimal(dataset=dataset, # embedding_dim=reducer_input_dim - local_position_embedding_dim, # reducer_input_dim=reducer_input_dim, # local_position_embedding_dim=local_position_embedding_dim, # flag_orchestra=False, # block_attention=block_attention) # # processor_decoder = ArrangementDataProcessorMinimal(dataset=dataset, # embedding_dim=reducer_input_dim - local_position_embedding_dim, # reducer_input_dim=reducer_input_dim, # local_position_embedding_dim=local_position_embedding_dim, # flag_orchestra=True, # block_attention=block_attention) # # processor_encodencoder = None # # return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'ar': dataset: ARDataset = ARDataset(phis=[0.9], length=128, c=0) # todo create BachTransformer and put BachBeats data processor in it processor_encoder = ARDataProcessor(dataset=dataset) processor_decoder = ARDataProcessor(dataset=dataset) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_categorical': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'group_instrument_per_section': group_instrument_per_section } dataset: ArrangementVoiceDataset = dataset_manager.get_dataset( name='arrangement_categorical', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionCategoricalDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionCategoricalDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_categorical_small': arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'group_instrument_per_section': group_instrument_per_section } dataset: ArrangementVoiceDataset = dataset_manager.get_dataset( name='arrangement_categorical_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionCategoricalDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionCategoricalDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_midiPiano': # For now just try a small value, anyway exception if too small mean_number_messages_per_time_frame = 14 arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'mean_number_messages_per_time_frame': mean_number_messages_per_time_frame, 'integrate_discretization': True } dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset( name='arrangement_midiPiano', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder elif dataset_type == 'reduction_midiPiano_small': # Todo: compuyte value before ? # For now just try a small value, anyway exception if too small mean_number_messages_per_time_frame = 14 arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'max_transposition': max_transposition, 'compute_statistics_flag': False, 'mean_number_messages_per_time_frame': mean_number_messages_per_time_frame, 'integrate_discretization': True } dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset( name='arrangement_midiPiano_small', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=block_attention) processor_decoder = ReductionMidiPianoDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None return dataset, processor_decoder, processor_encoder, processor_encodencoder else: raise NotImplementedError
def main(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, linear_hidden_size, batch_size, num_epochs, train, update, num_iterations, sequence_length_ticks, model_id, include_transpositions, update_iterations, generations_per_iteration, num_generations, score_chorales, write_scores): print('step 1/3: prepare dataset') dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': include_transpositions, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset load_or_pickle_distributions(dataset) print('step 2/3: prepare model') print(f'Model ID: {model_id}') deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size, model_id=model_id, ) if train: print('step 2a/3: train base model') deepbach.train(batch_size=batch_size, num_epochs=num_epochs, split=[0.85, 0.15]) else: print('step 2a/3: load model') deepbach.load() deepbach.cuda() if update: print( f'step 2b/3: update base model over {update_iterations} iterations' ) thres = get_threshold('data/chorale_scores.csv', col=-1) print(f'Threshold for selection: {thres}') update_file = open('data/update_scores.csv', 'w') reader = csv.writer(update_file) reader.writerow(['iteration', 'chorale ID', 'score']) for i in range(update_iterations): print(f'----------- Iteration {i} -----------') picked_chorales = [] num_picked_chorales = 0 ensure_dir(f'generations/{model_id}/{i}') for j in tqdm(range(generations_per_iteration)): chorale, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, sequence_length_ticks=sequence_length_ticks, ) score = score_chorale(chorale, dataset) # write data to csv file reader.writerow([i, j, score]) # iteration, generation #, score # worst Bach chorale score rounded up to nearest .01 if score > thres: print(f'Picked chorale {j} with score {score}') picked_chorales.append(chorale) num_picked_chorales += 1 chorale.write('midi', f'generations/{model_id}/{i}/c{j}.mid') print( f'Number of picked chorales for iteration {i}: {num_picked_chorales}' ) if num_picked_chorales == 0: continue all_datasets.update({ f'generated_chorales_{i}': { 'dataset_class_name': ChoraleDataset, 'corpus_it_gen': GeneratedChoraleIteratorGen(picked_chorales) } }) generated_dataset: ChoraleDataset = dataset_manager.get_dataset( name=f'generated_chorales_{i}', index2note_dicts=dataset.index2note_dicts, note2index_dicts=dataset.note2index_dicts, voice_ranges=dataset.voice_ranges, **chorale_dataset_kwargs) deepbach.dataset = generated_dataset deepbach.train( batch_size=batch_size, num_epochs=2, split=[1, 0], # use all selected chorales for training early_stopping=False) # generate chorales if score_chorales: chorale_scores = {} print('Scoring real chorales') for chorale_id, chorale in tqdm(enumerate(dataset.iterator_gen()), total=num_generations): score = score_chorale(chorale, dataset) chorale_scores[chorale_id] = score if chorale_id == num_generations: break # write scores to file if write_scores: with open('data/chorale_scores.csv', 'w') as chorale_file: reader = csv.writer(chorale_file) reader.writerow(['', 'score'] + list(weights.keys())) for id, value in chorale_scores.items(): reader.writerow([id, value]) if num_generations != 0: generation_scores = {} print('Generating and scoring generated chorales') ensure_dir(f'generations/{model_id}') for i in range(num_generations): chorale, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, sequence_length_ticks=sequence_length_ticks, ) chorale.write('midi', f'generations/{model_id}/c{i}.mid') score = score_chorale(chorale, dataset) generation_scores[i] = score # write scores to file if write_scores: with open(f'data/model{model_id}_scores.csv', 'w') as generation_file: reader = csv.writer(generation_file) reader.writerow(['', 'score'] + list(weights.keys())) for id, value in generation_scores.items(): reader.writerow([id, value])
from grader.grader import score_chorale from DatasetManager.chorale_dataset import ChoraleDataset from DatasetManager.dataset_manager import DatasetManager, all_datasets from DatasetManager.metadata import FermataMetadata, TickMetadata, KeyMetadata from DatasetManager.helpers import GeneratedChoraleIteratorGen from DeepBach.model_manager import DeepBach from DeepBach.helpers import * print('step 1/3: prepare dataset') dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [1, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': False, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset load_or_pickle_distributions(dataset) print(dataset.gaussian.covariances_) # chorale = converter.parse('generations/6/c187.mid') # score = score_chorale(chorale, dataset) # print(score)