def main(note_embedding_dim, metadata_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_epochs, train, log, lr, plot, teacher_forcing, early_stop): # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs) model = ConstraintModelGaussianReg( dataset=folk_dataset, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=teacher_forcing) if train: if torch.cuda.is_available(): model.cuda() trainer = AnticipationRNNGaussianRegTrainer(dataset=folk_dataset, model=model, lr=lr, early_stopping=early_stop) trainer.train_model(batch_size=batch_size, num_epochs=num_epochs, plot=plot, log=log) else: model.load() model.cuda() tester = AnticipationRNNTester(dataset=folk_dataset_test, model=model) tester.test_model(batch_size=512)
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, batch_size, num_epochs, train, plot, log, lr): dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } mvae_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_test_kwargs) model = MeasureVAE(dataset=folk_dataset, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata) if train: if torch.cuda.is_available(): model.cuda() trainer = VAETrainer(dataset=folk_dataset, model=model, lr=lr) trainer.train_model(batch_size=batch_size, num_epochs=num_epochs, plot=plot, log=log) else: model.load() model.cuda() model.eval() tester = VAETester(dataset=folk_dataset_test, model=model) tester.test_model()
def block_dataloader(self, batch_size): """ :return: torch Dataloader, returns batches of (batch_size, num_blocks=1, num_tokens_per_block) } """ dataset_manager = DatasetManager() num_tokens_per_beat = subdivision * num_voices # Positive dataset num_beats = self.num_tokens_per_block // num_tokens_per_beat chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': num_beats, 'subdivision': subdivision, } dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats', **chorale_dataset_kwargs) return [ ( { 'x': t[0] } # discard metadata for t in dataloader) for dataloader in dataset.data_loaders(batch_size) ]
def main(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, linear_hidden_size, batch_size, num_epochs, train, num_iterations, sequence_length_ticks): dataset_manager = DatasetManager() metadatas = [ FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata() ] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4 } bach_chorales_dataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs ) dataset = bach_chorales_dataset deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size ) if train: deepbach.train(batch_size=batch_size, num_epochs=num_epochs) else: deepbach.load() deepbach.cuda() print('Generation') score, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, length=sequence_length_ticks, ) score.write('midi', fp = 'test.mid')
def main( note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_epochs, train, no_metadata, ): metadatas = [ TickMetadata(subdivision=4), ] dataset_manager = DatasetManager() chorale_dataset_kwargs = { 'voice_ids': [0], 'metadatas': metadatas, 'sequences_size': 20, 'subdivision': 4 } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) model = AnticipationRNN( chorale_dataset=bach_chorales_dataset, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=meta_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, no_metadata=no_metadata, ) if train: model.cuda() model.train_model(batch_size=batch_size, num_epochs=num_epochs) else: model.load() model.cuda() print('Fill') score, _, _ = model.fill(C3) score.show()
def _dataset(self): if self.negative_sampling_method == 'random': dataset_manager = DatasetManager() self.cache_dir = dataset_manager.cache_dir num_tokens_per_beat = subdivision * num_voices num_tokens = self.num_tokens_per_block * (self.num_blocks_left + self.num_blocks_right) assert num_tokens % num_tokens_per_beat == 0 # Positive dataset num_beats_positive = num_tokens // num_tokens_per_beat chorale_dataset_positive_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': num_beats_positive, 'subdivision': subdivision, } dataset_positive: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats_test', **chorale_dataset_positive_kwargs) num_tokens_per_beat = subdivision * num_voices num_beats_negative = self.num_tokens_per_block // num_tokens_per_beat chorale_dataset_negative_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': num_beats_negative, 'subdivision': subdivision, } dataset_negative: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats_test', **chorale_dataset_negative_kwargs) return dataset_positive, dataset_negative else: raise NotImplementedError
def init_app(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, linear_hidden_size, num_iterations, sequence_length_ticks, ticks_per_quarter, port): global metadatas global _sequence_length_ticks global _num_iterations global _ticks_per_quarter global bach_chorales_dataset _ticks_per_quarter = ticks_per_quarter _sequence_length_ticks = sequence_length_ticks _num_iterations = num_iterations dataset_manager = DatasetManager() chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4 } _bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) bach_chorales_dataset = _bach_chorales_dataset assert sequence_length_ticks % bach_chorales_dataset.subdivision == 0 global deepbach deepbach = DeepBach(dataset=bach_chorales_dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size) deepbach.load() deepbach.cuda() # launch the script # use threaded=True to fix Chrome/Chromium engine hanging on requests # [https://stackoverflow.com/a/30670626] local_only = False if local_only: # accessible only locally: app.run(threaded=True) else: # accessible from outside: app.run(host='0.0.0.0', port=port, threaded=True)
def main(include_transpositions): dataset_manager = DatasetManager() print('step 1/3: prepare dataset') metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': include_transpositions, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset get_pairs(dataset, model_ids=[5, 9])
def setup(self): """Load the model""" # music21.environment.set("musicxmlPath", "/bin/true") note_embedding_dim = 20 meta_embedding_dim = 20 num_layers = 2 lstm_hidden_size = 256 dropout_lstm = 0.5 linear_hidden_size = 256 batch_size = 256 num_epochs = 5 train = False num_iterations = 500 sequence_length_ticks = 64 dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { "voice_ids": [0, 1, 2, 3], "metadatas": metadatas, "sequences_size": 8, "subdivision": 4, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name="bach_chorales", **chorale_dataset_kwargs ) dataset = bach_chorales_dataset self.deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size, ) self.deepbach.load() # load fluidsynth fo rmidi 2 audio conversion self.fs = FluidSynth()
def init_app( note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, ): metadatas = [ TickMetadata(subdivision=4), ] dataset_manager = DatasetManager() chorale_dataset_kwargs = { 'voice_ids': [0], 'metadatas': metadatas, 'sequences_size': 20, 'subdivision': 4 } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) global model model = AnticipationRNN( chorale_dataset=bach_chorales_dataset, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=meta_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, ) model.load() model.cuda() # launch the script # accessible only locally: app.run()
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size, latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_target, num_models): # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_dataset_vae: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs) # init vae model vae_model = MeasureVAE(dataset=folk_dataset_vae, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata) vae_model.load() # VAE model must be pre-trained if torch.cuda.is_available(): vae_model.cuda() folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset_train: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs) # init latent_rnn model and latent_rnn_tester latent_rnn_model = LatentRNN(dataset=folk_dataset_train, vae_model=vae_model, num_rnn_layers=num_latent_rnn_layers, rnn_hidden_size=latent_rnn_hidden_size, dropout=latent_rnn_dropout_prob, rnn_class=torch.nn.GRU, auto_reg=False, teacher_forcing=True) latent_rnn_model.load() # latent_rnn model must be pre-trained if torch.cuda.is_available(): latent_rnn_model.cuda() latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test, model=latent_rnn_model) # inti arnn model and arnn_testes arnn_model = ConstraintModelGaussianReg( dataset=folk_dataset_train, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=True) arnn_model.load() # ARNN model must be pre-trained if torch.cuda.is_available(): arnn_model.cuda() arnn_tester = AnticipationRNNTester(dataset=folk_dataset_test, model=arnn_model) arnn_baseline_model = AnticipationRNNBaseline( dataset=folk_dataset_train, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=True) arnn_baseline_model.load() # ARNN model must be pre-trained if torch.cuda.is_available(): arnn_baseline_model.cuda() arnn_baseline_tester = AnticipationRNNTester(dataset=folk_dataset_test, model=arnn_baseline_model) # create test dataloader (_, _, test_dataloader) = folk_dataset_test.data_loaders(batch_size=batch_size, split=(0.01, 0.01)) # test print('Num Test Batches: ', len(test_dataloader)) latent_rnn_mean_loss, latent_rnn_mean_accuracy, \ arnn_mean_loss, arnn_mean_accuracy, \ arnn_baseline_mean_loss, arnn_baseline_mean_accuracy = loss_and_acc_test( data_loader=test_dataloader, latent_rnn_tester=latent_rnn_tester, arnn_tester=arnn_tester, arnn_baseline_tester=arnn_baseline_tester, num_target_measures=num_target, num_models=num_models ) print('Test Epoch:') print('latent_rnn Test Loss: ', latent_rnn_mean_loss, '\n' 'latent_rnn Test Accuracy: ', latent_rnn_mean_accuracy * 100, '\n' 'ARNN Test Loss: ', arnn_mean_loss, '\n' 'ARNN Test Accuracy: ', arnn_mean_accuracy * 100, '\n' 'ARNN Baseline Test Loss: ', arnn_baseline_mean_loss, '\n' 'ARNN Baseline Test Accuracy: ', arnn_baseline_mean_accuracy * 100, '\n')
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size, latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_target, num_models): random.seed(0) # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_dataset_vae: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs) # init vae model vae_model = MeasureVAE(dataset=folk_dataset_vae, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata) vae_model.load() # VAE model must be pre-trained if torch.cuda.is_available(): vae_model.cuda() folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset_train: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs) # Initialize stuff test_filenames = folk_dataset_test.dataset_filenames num_melodies = 32 num_measures = 16 req_length = num_measures * 4 * 6 num_past = 6 num_future = 6 num_target = 4 cur_dir = os.path.dirname(os.path.realpath(__file__)) save_folder = 'saved_midi/' # First save original data for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] # save original scores save_filename = os.path.join(cur_dir, save_folder + f_id + '_original.mid') if os.path.isfile(save_filename): continue f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) metadata_tensor = folk_dataset_test.get_metadata_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] metadata_tensor = metadata_tensor[:, :req_length, :] trunc_score = folk_dataset_test.tensor_to_score(score_tensor) trunc_score.write('midi', fp=save_filename) # Initialize models and testers latent_rnn_model = LatentRNN(dataset=folk_dataset_train, vae_model=vae_model, num_rnn_layers=num_latent_rnn_layers, rnn_hidden_size=latent_rnn_hidden_size, dropout=latent_rnn_dropout_prob, rnn_class=torch.nn.GRU, auto_reg=False, teacher_forcing=True) latent_rnn_model.load() # Latent RNN model must be pre-trained if torch.cuda.is_available(): latent_rnn_model.cuda() latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test, model=latent_rnn_model) def process_latent_rnn_batch(score_tensor, num_past=6, num_future=6, num_target=4): assert (num_past + num_future + num_target == 16) score_tensor = score_tensor.unsqueeze(0) score_tensor = LatentRNNTrainer.split_to_measures(score_tensor, 24) tensor_past, tensor_future, tensor_target = LatentRNNTrainer.split_score( score_tensor=score_tensor, num_past=num_past, num_future=num_future, num_target=num_target, measure_seq_len=24) return tensor_past, tensor_future, tensor_target # Second save latent_rnn generations for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] save_filename = os.path.join(cur_dir, save_folder + f_id + '_latent_rnn.mid') if os.path.isfile(save_filename): continue f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) # metadata_tensor = folk_dataset_test.get_metadata_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] # metadata_tensor = metadata_tensor[:, :req_length, :] # save regeneration using latent_rnn tensor_past, tensor_future, tensor_target = process_latent_rnn_batch( score_tensor, num_past, num_future, num_target) # forward pass through latent_rnn weights, gen_target, _ = latent_rnn_tester.model( past_context=tensor_past, future_context=tensor_future, target=tensor_target, measures_to_generate=num_target, train=False, ) # convert to score batch_size, _, _ = gen_target.size() gen_target = gen_target.view(batch_size, num_target, 24) gen_score_tensor = torch.cat((tensor_past, gen_target, tensor_future), 1) latent_rnn_score = folk_dataset_test.tensor_to_score( gen_score_tensor.cpu()) latent_rnn_score.write('midi', fp=save_filename) # Intialize arnn model and arnn_tester arnn_model = ConstraintModelGaussianReg( dataset=folk_dataset_train, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=True) arnn_model.load() # ARNN model must be pre-trained if torch.cuda.is_available(): arnn_model.cuda() arnn_tester = AnticipationRNNTester(dataset=folk_dataset_test, model=arnn_model) def process_arnn_batch(score_tensor, metadata_tensor, arnn_tester, num_past=6, num_target=4): score_tensor = score_tensor.unsqueeze(0) metadata_tensor = metadata_tensor.unsqueeze(0) tensor_score = to_cuda_variable_long(score_tensor) tensor_metadata = to_cuda_variable_long(metadata_tensor) constraints_location, start_tick, end_tick = arnn_tester.get_constraints_location( tensor_score, is_stochastic=False, start_measure=num_past, num_measures=num_target) arnn_batch = (tensor_score, tensor_metadata, constraints_location, start_tick, end_tick) return arnn_batch # Third save ARNN-Reg generations for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] save_filename = os.path.join(cur_dir, save_folder + f_id + '_arnn_reg.mid') if os.path.isfile(save_filename): continue f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) metadata_tensor = folk_dataset_test.get_metadata_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] metadata_tensor = metadata_tensor[:, :req_length, :] # save regeneration using latent_rnn tensor_score, tensor_metadata, constraints_location, start_tick, end_tick = \ process_arnn_batch(score_tensor, metadata_tensor, arnn_tester, num_past, num_target) # forward pass through latent_rnn _, gen_target = arnn_tester.model.forward_inpaint( score_tensor=tensor_score, metadata_tensor=tensor_metadata, constraints_loc=constraints_location, start_tick=start_tick, end_tick=end_tick, ) # convert to score arnn_score = folk_dataset_test.tensor_to_score(gen_target.cpu()) arnn_score.write('midi', fp=save_filename) # Intialize arnn-baseline model and arnn_tester arnn_baseline_model = AnticipationRNNBaseline( dataset=folk_dataset_train, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_layers=num_layers, num_lstm_constraints_units=lstm_hidden_size, num_lstm_generation_units=lstm_hidden_size, linear_hidden_size=linear_hidden_size, dropout_prob=dropout_lstm, dropout_input_prob=input_dropout, unary_constraint=True, teacher_forcing=True) arnn_baseline_model.load() # ARNN model must be pre-trained if torch.cuda.is_available(): arnn_baseline_model.cuda() arnn_baseline_tester = AnticipationRNNTester(dataset=folk_dataset_test, model=arnn_baseline_model) # Fourth save ARNN-Baseline generations for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] save_filename = os.path.join(cur_dir, save_folder + f_id + '_arnn_baseline.mid') if os.path.isfile(save_filename): continue f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) metadata_tensor = folk_dataset_test.get_metadata_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] metadata_tensor = metadata_tensor[:, :req_length, :] # save regeneration using latent_rnn tensor_score, tensor_metadata, constraints_location, start_tick, end_tick = \ process_arnn_batch(score_tensor, metadata_tensor, arnn_baseline_tester, num_past, num_target) # forward pass through latent_rnn _, gen_target = arnn_baseline_tester.model.forward_inpaint( score_tensor=tensor_score, metadata_tensor=tensor_metadata, constraints_loc=constraints_location, start_tick=start_tick, end_tick=end_tick, ) # convert to score arnn_baseline_score = folk_dataset_test.tensor_to_score( gen_target.cpu()) arnn_baseline_score.write('midi', fp=save_filename)
def __init__(self, writing_dir, corpus_it_gen, subdivision_model=2, subdivision_read=4, sequence_size=3, velocity_quantization=2, temperature=1.2): """ :param subdivision: number of sixteenth notes per beat """ self.subdivision_read = subdivision_read self.sequence_size = sequence_size self.velocity_quantization = velocity_quantization self.writing_dir = writing_dir ################################################################# # Need the old db used to train the model (yes it sucks...) dataset_manager = DatasetManager() arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision_model, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': 12, 'compute_statistics_flag': False } dataset = dataset_manager.get_dataset(name='arrangement_large', **arrangement_dataset_kwargs) # Model params (need to know them :)) num_heads = 8 per_head_dim = 64 local_position_embedding_dim = 8 position_ff_dim = 1024 hierarchical = False block_attention = False nade = False conditioning = True double_conditioning = False num_layers = 2 suffix = 'TEST' reducer_input_dim = num_heads * per_head_dim processor_encoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='orchestra', block_attention=False) processor_decoder = ReductionDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag='piano', block_attention=block_attention) processor_encodencoder = None ################################################################# ################################################################# # Init model # Use all gpus available gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())] print(gpu_ids) self.model = Transformer( dataset=dataset, data_processor_encodencoder=processor_encodencoder, data_processor_encoder=processor_encoder, data_processor_decoder=processor_decoder, num_heads=num_heads, per_head_dim=per_head_dim, position_ff_dim=position_ff_dim, hierarchical_encoding=hierarchical, block_attention=block_attention, nade=nade, conditioning=conditioning, double_conditioning=double_conditioning, num_layers=num_layers, dropout=0.1, input_dropout=0.2, reduction_flag=True, lr=1e-4, gpu_ids=gpu_ids, suffix=suffix) ################################################################# self.corpus_it_gen = corpus_it_gen self.temperature = temperature return
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size, latent_rnn_dropout_prob, batch_size, num_epochs, train, lr, plot, log, auto_reg, teacher_forcing, early_stop ): # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_dataset_vae: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs ) # init vae model vae_model = MeasureVAE( dataset=folk_dataset_vae, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata ) vae_model.load() # VAE model must be pre-trained folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset_train: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs ) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs ) # init latent_rnn model model = LatentRNN( dataset=folk_dataset_train, vae_model=vae_model, num_rnn_layers=num_latent_rnn_layers, rnn_hidden_size=latent_rnn_hidden_size, dropout=latent_rnn_dropout_prob, rnn_class=torch.nn.GRU, auto_reg=auto_reg, teacher_forcing=teacher_forcing ) if train: if torch.cuda.is_available(): model.cuda() trainer = LatentRNNTrainer( dataset=folk_dataset_train, model=model, lr=lr, early_stopping=early_stop ) trainer.train_model( batch_size=batch_size, num_epochs=num_epochs, plot=plot, log=log ) else: model.load() model.cuda() model.eval() tester = LatentRNNTester( dataset=folk_dataset_test, model=model ) tester.test_model( batch_size=batch_size ) gen_score, score, original_score = tester.generation_random( tensor_score=None, start_measure=8, num_measures_gen=2 ) print( " --- score --- " ) print( score ) gen_score.show() original_score.show() gen_score2, score, original_score2 = tester.generation_test() gen_score2.show() original_score2.show() print( " --- score --- " ) print( score )
def main(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size, dropout_lstm, linear_hidden_size, batch_size, num_epochs, train, update, num_iterations, sequence_length_ticks, model_id, include_transpositions, update_iterations, generations_per_iteration, num_generations, score_chorales, write_scores): print('step 1/3: prepare dataset') dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': include_transpositions, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset load_or_pickle_distributions(dataset) print('step 2/3: prepare model') print(f'Model ID: {model_id}') deepbach = DeepBach( dataset=dataset, note_embedding_dim=note_embedding_dim, meta_embedding_dim=meta_embedding_dim, num_layers=num_layers, lstm_hidden_size=lstm_hidden_size, dropout_lstm=dropout_lstm, linear_hidden_size=linear_hidden_size, model_id=model_id, ) if train: print('step 2a/3: train base model') deepbach.train(batch_size=batch_size, num_epochs=num_epochs, split=[0.85, 0.15]) else: print('step 2a/3: load model') deepbach.load() deepbach.cuda() if update: print( f'step 2b/3: update base model over {update_iterations} iterations' ) thres = get_threshold('data/chorale_scores.csv', col=-1) print(f'Threshold for selection: {thres}') update_file = open('data/update_scores.csv', 'w') reader = csv.writer(update_file) reader.writerow(['iteration', 'chorale ID', 'score']) for i in range(update_iterations): print(f'----------- Iteration {i} -----------') picked_chorales = [] num_picked_chorales = 0 ensure_dir(f'generations/{model_id}/{i}') for j in tqdm(range(generations_per_iteration)): chorale, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, sequence_length_ticks=sequence_length_ticks, ) score = score_chorale(chorale, dataset) # write data to csv file reader.writerow([i, j, score]) # iteration, generation #, score # worst Bach chorale score rounded up to nearest .01 if score > thres: print(f'Picked chorale {j} with score {score}') picked_chorales.append(chorale) num_picked_chorales += 1 chorale.write('midi', f'generations/{model_id}/{i}/c{j}.mid') print( f'Number of picked chorales for iteration {i}: {num_picked_chorales}' ) if num_picked_chorales == 0: continue all_datasets.update({ f'generated_chorales_{i}': { 'dataset_class_name': ChoraleDataset, 'corpus_it_gen': GeneratedChoraleIteratorGen(picked_chorales) } }) generated_dataset: ChoraleDataset = dataset_manager.get_dataset( name=f'generated_chorales_{i}', index2note_dicts=dataset.index2note_dicts, note2index_dicts=dataset.note2index_dicts, voice_ranges=dataset.voice_ranges, **chorale_dataset_kwargs) deepbach.dataset = generated_dataset deepbach.train( batch_size=batch_size, num_epochs=2, split=[1, 0], # use all selected chorales for training early_stopping=False) # generate chorales if score_chorales: chorale_scores = {} print('Scoring real chorales') for chorale_id, chorale in tqdm(enumerate(dataset.iterator_gen()), total=num_generations): score = score_chorale(chorale, dataset) chorale_scores[chorale_id] = score if chorale_id == num_generations: break # write scores to file if write_scores: with open('data/chorale_scores.csv', 'w') as chorale_file: reader = csv.writer(chorale_file) reader.writerow(['', 'score'] + list(weights.keys())) for id, value in chorale_scores.items(): reader.writerow([id, value]) if num_generations != 0: generation_scores = {} print('Generating and scoring generated chorales') ensure_dir(f'generations/{model_id}') for i in range(num_generations): chorale, tensor_chorale, tensor_metadata = deepbach.generation( num_iterations=num_iterations, sequence_length_ticks=sequence_length_ticks, ) chorale.write('midi', f'generations/{model_id}/c{i}.mid') score = score_chorale(chorale, dataset) generation_scores[i] = score # write scores to file if write_scores: with open(f'data/model{model_id}_scores.csv', 'w') as generation_file: reader = csv.writer(generation_file) reader.writerow(['', 'score'] + list(weights.keys())) for id, value in generation_scores.items(): reader.writerow([id, value])
from DatasetManager.dataset_manager import DatasetManager from DatasetManager.metadata import BeatMarkerMetadata, TickMetadata dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] folk_dataset_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 1, 'train': True } folk_dataset: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_chords', **folk_dataset_kwargs) (train_dataloader, val_dataloader, test_dataloader) = folk_dataset.data_loaders(batch_size=100, split=(0.7, 0.2)) print('Num Train Batches: ', len(train_dataloader)) print('Num Valid Batches: ', len(val_dataloader)) print('Num Test Batches: ', len(test_dataloader)) folk_dataset_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 1, 'train': False } folk_dataset: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_chords', **folk_dataset_kwargs)
print(f'Sizes: {metadata_tensor_dataset.size()}') return dataset if __name__ == '__main__': from DatasetManager.dataset_manager import DatasetManager from DatasetManager.metadata import BeatMarkerMetadata, TickMetadata dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] folk_dataset_kwargs = {'metadatas': metadatas, 'sequences_size': 32} folk_dataset: FolkDataset = dataset_manager.get_dataset( name='folk_4by4measures_test', **folk_dataset_kwargs) (train_dataloader, val_dataloader, test_dataloader) = folk_dataset.data_loaders(batch_size=100, split=(0.7, 0.2)) print('Num Train Batches: ', len(train_dataloader)) print('Num Valid Batches: ', len(val_dataloader)) print('Num Test Batches: ', len(test_dataloader)) for sample_id, (score, _) in tqdm(enumerate(train_dataloader)): score = score.long() if torch.cuda.is_available(): score = torch.autograd.Variable(score.cuda()) else: score = torch.autograd.Variable(score) beat_str = folk_dataset.get_beat_strength(score) rhy_ent = folk_dataset.get_rhythmic_entropy(score)
from grader.grader import score_chorale from DatasetManager.chorale_dataset import ChoraleDataset from DatasetManager.dataset_manager import DatasetManager, all_datasets from DatasetManager.metadata import FermataMetadata, TickMetadata, KeyMetadata from DatasetManager.helpers import GeneratedChoraleIteratorGen from DeepBach.model_manager import DeepBach from DeepBach.helpers import * print('step 1/3: prepare dataset') dataset_manager = DatasetManager() metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()] chorale_dataset_kwargs = { 'voice_ids': [1, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': 8, 'subdivision': 4, 'include_transpositions': False, } bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset( name='bach_chorales', **chorale_dataset_kwargs) dataset = bach_chorales_dataset load_or_pickle_distributions(dataset) print(dataset.gaussian.covariances_) # chorale = converter.parse('generations/6/c187.mid') # score = score_chorale(chorale, dataset) # print(score)
def _dataset(self): """ Loads the appropriate dataset depending on the sampling method :return: ChoraleDataset or tuple(ChoraleDataset) """ dataset_manager = DatasetManager() self.cache_dir = dataset_manager.cache_dir if self.negative_sampling_method == 'random_bad': num_tokens_per_beat = subdivision * num_voices num_tokens = self.num_tokens_per_block * (self.num_blocks_left + self.num_blocks_right) assert num_tokens % num_tokens_per_beat == 0 # Positive dataset num_beats_positive = num_tokens // num_tokens_per_beat chorale_dataset_positive_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': num_beats_positive, 'subdivision': subdivision, } dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats', **chorale_dataset_positive_kwargs) return dataset elif self.negative_sampling_method == 'same_sequence': # FIXME for the moment, exactly the same as 'random' _dataset dataset_manager = DatasetManager() num_tokens_per_beat = subdivision * num_voices num_tokens = self.num_tokens_per_block * (self.num_blocks_left + self.num_blocks_right) assert num_tokens % num_tokens_per_beat == 0 # Positive dataset num_beats_positive = num_tokens // num_tokens_per_beat chorale_dataset_positive_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': num_beats_positive, 'subdivision': subdivision, } dataset: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats', **chorale_dataset_positive_kwargs) return dataset if self.negative_sampling_method == 'random': dataset_manager = DatasetManager() num_tokens_per_beat = subdivision * num_voices num_tokens = self.num_tokens_per_block * (self.num_blocks_left + self.num_blocks_right) assert num_tokens % num_tokens_per_beat == 0 # Positive dataset num_beats_positive = num_tokens // num_tokens_per_beat chorale_dataset_positive_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': num_beats_positive, 'subdivision': subdivision, } dataset_positive: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats', **chorale_dataset_positive_kwargs) num_tokens_per_beat = subdivision * num_voices num_beats_negative = self.num_tokens_per_block // num_tokens_per_beat chorale_dataset_negative_kwargs = { 'voice_ids': [0, 1, 2, 3], 'metadatas': metadatas, 'sequences_size': num_beats_negative, 'subdivision': subdivision, } dataset_negative: ChoraleBeatsDataset = dataset_manager.get_dataset( name='bach_chorales_beats', **chorale_dataset_negative_kwargs) return dataset_positive, dataset_negative else: raise NotImplementedError
chord_name_index = chord_name_index.item() chord_desc = index2chordroot[chord_root_index] + \ index2chordname[chord_name_index] chord_list.append(chord_desc) return score, chord_list if __name__ == '__main__': from DatasetManager.dataset_manager import DatasetManager dataset_manager = DatasetManager() leadsheet_dataset_kwargs = { 'sequences_size': 64, } lsdb_dataset: LsdbDataset = dataset_manager.get_dataset( name='lsdb_test', **leadsheet_dataset_kwargs) dl, _, _ = lsdb_dataset.data_loaders(1) tensor_lead, tensor_chord_root, tensor_chord_name = next(dl.__iter__()) print(tensor_lead[0].size(), tensor_chord_root[0].size(), tensor_chord_name[0].size()) tensor_chord = (tensor_chord_root[0], tensor_chord_name[0]) score, chord_list = lsdb_dataset.tensor_leadsheet_to_score_and_chord_list( tensor_lead[0], tensor_chord) score.show() print(chord_list) # leadsheet_path = '/home/ashis/Documents/AnticipationRNNFolkDataset/DatasetManager/DatasetManager/lsdb/xml/4_4_all/52nd Street Theme.xml' # leadsheet = music21.converter.parse(leadsheet_path) # lead, chord_root, chord_name = lsdb_dataset.get_score_tensor(leadsheet) # chord = (chord_root, chord_name) # score = lsdb_dataset.tensor_to_score( # lead,
def init_app(block_attention, hierarchical, nade, num_layers, dropout, input_dropout, per_head_dim, num_heads, local_position_embedding_dim, position_ff_dim, suffix, subdivision, sequence_size, velocity_quantization, max_transposition, port): global metadatas global _subdivision global _batch_size global _banned_instruments global _temperature global _lowest_entropy_first global _context_size _subdivision = subdivision _batch_size = 1 _banned_instruments = [] _temperature = 1.2 _lowest_entropy_first = True gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())] print(gpu_ids) dataset_manager = DatasetManager() arrangement_dataset_kwargs = { 'transpose_to_sounding_pitch': True, 'subdivision': subdivision, 'sequence_size': sequence_size, 'velocity_quantization': velocity_quantization, 'max_transposition': max_transposition, 'compute_statistics_flag': False } dataset: ArrangementDataset = dataset_manager.get_dataset( name='arrangement', **arrangement_dataset_kwargs) reducer_input_dim = num_heads * per_head_dim processor_encoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag_orchestra=False, block_attention=False) processor_decoder = ArrangementDataProcessor( dataset=dataset, embedding_dim=reducer_input_dim - local_position_embedding_dim, reducer_input_dim=reducer_input_dim, local_position_embedding_dim=local_position_embedding_dim, flag_orchestra=True, block_attention=block_attention) _context_size = processor_decoder.num_frames_orchestra - 1 global model model = Transformer( dataset=dataset, data_processor_encoder=processor_encoder, data_processor_decoder=processor_decoder, num_heads=num_heads, per_head_dim=per_head_dim, position_ff_dim=position_ff_dim, hierarchical_encoding=hierarchical, block_attention=block_attention, nade=nade, num_layers=num_layers, dropout=dropout, input_dropout=input_dropout, conditioning=True, lr=0, gpu_ids=gpu_ids, suffix=suffix, ) model.load_overfit() model.cuda() # TODO : piano should be modifiable (by dropping mxml file ?) filepath = "/home/leo/Recherche/Databases/Orchestration/arrangement_mxml/source_for_generation/chopin_Prel_Op28_20_xs.xml" global _piano global _rhythm_piano global _orchestra global _orchestra_silenced_instruments _piano, _rhythm_piano, _orchestra, _orchestra_silenced_instruments = \ model.data_processor_encoder.init_generation_filepath(_batch_size, filepath, banned_instruments=_banned_instruments, subdivision=_subdivision) # launch the script # use threaded=True to fix Chrome/Chromium engine hanging on requests # [https://stackoverflow.com/a/30670626] local_only = False if local_only: # accessible only locally: app.run(threaded=True) else: # accessible from outside: app.run(host='0.0.0.0', port=port, threaded=True)
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers, encoder_hidden_size, encoder_dropout_prob, latent_space_dim, num_decoder_layers, decoder_hidden_size, decoder_dropout_prob, has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size, latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size, batch_size, num_target, num_models): random.seed(0) # init dataset dataset_manager = DatasetManager() metadatas = [ BeatMarkerMetadata(subdivision=6), TickMetadata(subdivision=6) ] mvae_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_dataset_vae: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **mvae_train_kwargs) # init vae model vae_model = MeasureVAE(dataset=folk_dataset_vae, note_embedding_dim=note_embedding_dim, metadata_embedding_dim=metadata_embedding_dim, num_encoder_layers=num_encoder_layers, encoder_hidden_size=encoder_hidden_size, encoder_dropout_prob=encoder_dropout_prob, latent_space_dim=latent_space_dim, num_decoder_layers=num_decoder_layers, decoder_hidden_size=decoder_hidden_size, decoder_dropout_prob=decoder_dropout_prob, has_metadata=has_metadata) vae_model.load() # VAE model must be pre-trained if torch.cuda.is_available(): vae_model.cuda() folk_train_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': True } folk_test_kwargs = { 'metadatas': metadatas, 'sequences_size': 32, 'num_bars': 16, 'train': False } folk_dataset_train: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_train_kwargs) folk_dataset_test: FolkDataset = dataset_manager.get_dataset( name='folk_4by4nbars_train', **folk_test_kwargs) # Initialize stuff test_filenames = folk_dataset_test.dataset_filenames num_melodies = 32 num_measures = 16 req_length = num_measures * 4 * 6 num_past = 6 num_future = 6 num_target = 4 cur_dir = os.path.dirname(os.path.realpath(__file__)) save_folder = 'saved_midi/' # Initialize models and testers latent_rnn_model = LatentRNN(dataset=folk_dataset_train, vae_model=vae_model, num_rnn_layers=num_latent_rnn_layers, rnn_hidden_size=latent_rnn_hidden_size, dropout=latent_rnn_dropout_prob, rnn_class=torch.nn.GRU, auto_reg=False, teacher_forcing=True) latent_rnn_model.load() # latent_rnn model must be pre-trained if torch.cuda.is_available(): latent_rnn_model.cuda() latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test, model=latent_rnn_model) def process_latent_rnn_batch(score_tensor, num_past=6, num_future=6, num_target=4): assert (num_past + num_future + num_target == 16) score_tensor = score_tensor.unsqueeze(0) score_tensor = LatentRNNTrainer.split_to_measures(score_tensor, 24) tensor_past, tensor_future, tensor_target = LatentRNNTrainer.split_score( score_tensor=score_tensor, num_past=num_past, num_future=num_future, num_target=num_target, measure_seq_len=24) return tensor_past, tensor_future, tensor_target # Second save latent_rnn generations for i in tqdm(range(num_melodies)): f = test_filenames[i] f_id = f[:-4] if f_id == 'tune_16154': for j in range(15): save_filename = os.path.join( cur_dir, save_folder + f_id + '_' + str(j) + '_latent_rnn.mid') f = os.path.join( folk_dataset_test.corpus_it_gen.raw_dataset_dir, f) score = folk_dataset_test.corpus_it_gen.get_score_from_path( f, fix_and_expand=True) score_tensor = folk_dataset_test.get_score_tensor(score) # ignore scores with less than 16 measures if score_tensor.size(1) < req_length: continue score_tensor = score_tensor[:, :req_length] # metadata_tensor = metadata_tensor[:, :req_length, :] # save regeneration using latent_rnn tensor_past, tensor_future, tensor_target = process_latent_rnn_batch( score_tensor, num_past, num_future, num_target) # forward pass through latent_rnn weights, gen_target, _ = latent_rnn_tester.model( past_context=tensor_past, future_context=tensor_future, target=tensor_target, measures_to_generate=num_target, train=False, ) # convert to score batch_size, _, _ = gen_target.size() gen_target = gen_target.view(batch_size, num_target, 24) gen_score_tensor = torch.cat( (tensor_past, gen_target, tensor_future), 1) latent_rnn_score = folk_dataset_test.tensor_to_score( gen_score_tensor.cpu()) latent_rnn_score.write('midi', fp=save_filename)