Ejemplo n.º 1
0
    def block_dataloader(self, batch_size):
        """

            :return: torch Dataloader, returns batches of
            (batch_size, num_blocks=1, num_tokens_per_block)
            }

        """
        dataset_manager = DatasetManager()
        num_tokens_per_beat = subdivision * num_voices

        # Positive dataset
        num_beats = self.num_tokens_per_block // num_tokens_per_beat
        chorale_dataset_kwargs = {
            'voice_ids': [0, 1, 2, 3],
            'metadatas': metadatas,
            'sequences_size': num_beats,
            'subdivision': subdivision,
        }

        dataset: ChoraleBeatsDataset = dataset_manager.get_dataset(
            name='bach_chorales_beats', **chorale_dataset_kwargs)
        return [
            (
                {
                    'x': t[0]
                }  # discard metadata
                for t in dataloader)
            for dataloader in dataset.data_loaders(batch_size)
        ]
Ejemplo n.º 2
0
def main(note_embedding_dim, metadata_embedding_dim, num_layers,
         lstm_hidden_size, dropout_lstm, input_dropout, linear_hidden_size,
         batch_size, num_epochs, train, log, lr, plot, teacher_forcing,
         early_stop):
    # init dataset
    dataset_manager = DatasetManager()
    metadatas = [
        BeatMarkerMetadata(subdivision=6),
        TickMetadata(subdivision=6)
    ]
    folk_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_test_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': False
    }
    folk_dataset: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **folk_train_kwargs)
    folk_dataset_test: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **folk_test_kwargs)

    model = ConstraintModelGaussianReg(
        dataset=folk_dataset,
        note_embedding_dim=note_embedding_dim,
        metadata_embedding_dim=metadata_embedding_dim,
        num_layers=num_layers,
        num_lstm_constraints_units=lstm_hidden_size,
        num_lstm_generation_units=lstm_hidden_size,
        linear_hidden_size=linear_hidden_size,
        dropout_prob=dropout_lstm,
        dropout_input_prob=input_dropout,
        unary_constraint=True,
        teacher_forcing=teacher_forcing)

    if train:
        if torch.cuda.is_available():
            model.cuda()
        trainer = AnticipationRNNGaussianRegTrainer(dataset=folk_dataset,
                                                    model=model,
                                                    lr=lr,
                                                    early_stopping=early_stop)
        trainer.train_model(batch_size=batch_size,
                            num_epochs=num_epochs,
                            plot=plot,
                            log=log)
    else:
        model.load()
        model.cuda()
    tester = AnticipationRNNTester(dataset=folk_dataset_test, model=model)
    tester.test_model(batch_size=512)
Ejemplo n.º 3
0
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers,
         encoder_hidden_size, encoder_dropout_prob, latent_space_dim,
         num_decoder_layers, decoder_hidden_size, decoder_dropout_prob,
         has_metadata, batch_size, num_epochs, train, plot, log, lr):

    dataset_manager = DatasetManager()
    metadatas = [
        BeatMarkerMetadata(subdivision=6),
        TickMetadata(subdivision=6)
    ]
    mvae_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    mvae_test_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': False
    }
    folk_dataset: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **mvae_train_kwargs)

    folk_dataset_test: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **mvae_test_kwargs)

    model = MeasureVAE(dataset=folk_dataset,
                       note_embedding_dim=note_embedding_dim,
                       metadata_embedding_dim=metadata_embedding_dim,
                       num_encoder_layers=num_encoder_layers,
                       encoder_hidden_size=encoder_hidden_size,
                       encoder_dropout_prob=encoder_dropout_prob,
                       latent_space_dim=latent_space_dim,
                       num_decoder_layers=num_decoder_layers,
                       decoder_hidden_size=decoder_hidden_size,
                       decoder_dropout_prob=decoder_dropout_prob,
                       has_metadata=has_metadata)

    if train:
        if torch.cuda.is_available():
            model.cuda()
        trainer = VAETrainer(dataset=folk_dataset, model=model, lr=lr)
        trainer.train_model(batch_size=batch_size,
                            num_epochs=num_epochs,
                            plot=plot,
                            log=log)
    else:
        model.load()
        model.cuda()
        model.eval()

    tester = VAETester(dataset=folk_dataset_test, model=model)
    tester.test_model()
Ejemplo n.º 4
0
def main(note_embedding_dim,
         meta_embedding_dim,
         num_layers,
         lstm_hidden_size,
         dropout_lstm,
         linear_hidden_size,
         batch_size,
         num_epochs,
         train,
         num_iterations,
         sequence_length_ticks):
    dataset_manager = DatasetManager()

    metadatas = [
       FermataMetadata(),
       TickMetadata(subdivision=4),
       KeyMetadata()
    ]
    chorale_dataset_kwargs = {
        'voice_ids':      [0, 1, 2, 3],
        'metadatas':      metadatas,
        'sequences_size': 8,
        'subdivision':    4
    }
    bach_chorales_dataset = dataset_manager.get_dataset(
        name='bach_chorales',
        **chorale_dataset_kwargs
        )
    dataset = bach_chorales_dataset

    deepbach = DeepBach(
        dataset=dataset,
        note_embedding_dim=note_embedding_dim,
        meta_embedding_dim=meta_embedding_dim,
        num_layers=num_layers,
        lstm_hidden_size=lstm_hidden_size,
        dropout_lstm=dropout_lstm,
        linear_hidden_size=linear_hidden_size
    )

    if train:
        deepbach.train(batch_size=batch_size,
                       num_epochs=num_epochs)
    else:
        deepbach.load()
        deepbach.cuda()

    print('Generation')
    score, tensor_chorale, tensor_metadata = deepbach.generation(
        num_iterations=num_iterations,
        length=sequence_length_ticks,
    )
    score.write('midi', fp = 'test.mid')
Ejemplo n.º 5
0
def main(
    note_embedding_dim,
    meta_embedding_dim,
    num_layers,
    lstm_hidden_size,
    dropout_lstm,
    input_dropout,
    linear_hidden_size,
    batch_size,
    num_epochs,
    train,
    no_metadata,
):
    metadatas = [
        TickMetadata(subdivision=4),
    ]

    dataset_manager = DatasetManager()
    chorale_dataset_kwargs = {
        'voice_ids': [0],
        'metadatas': metadatas,
        'sequences_size': 20,
        'subdivision': 4
    }

    bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
        name='bach_chorales', **chorale_dataset_kwargs)

    model = AnticipationRNN(
        chorale_dataset=bach_chorales_dataset,
        note_embedding_dim=note_embedding_dim,
        metadata_embedding_dim=meta_embedding_dim,
        num_layers=num_layers,
        num_lstm_constraints_units=lstm_hidden_size,
        num_lstm_generation_units=lstm_hidden_size,
        linear_hidden_size=linear_hidden_size,
        dropout_prob=dropout_lstm,
        dropout_input_prob=input_dropout,
        unary_constraint=True,
        no_metadata=no_metadata,
    )

    if train:
        model.cuda()
        model.train_model(batch_size=batch_size, num_epochs=num_epochs)
    else:
        model.load()
        model.cuda()

    print('Fill')
    score, _, _ = model.fill(C3)
    score.show()
Ejemplo n.º 6
0
def init_app(note_embedding_dim, meta_embedding_dim, num_layers,
             lstm_hidden_size, dropout_lstm, linear_hidden_size,
             num_iterations, sequence_length_ticks, ticks_per_quarter, port):
    global metadatas
    global _sequence_length_ticks
    global _num_iterations
    global _ticks_per_quarter
    global bach_chorales_dataset

    _ticks_per_quarter = ticks_per_quarter
    _sequence_length_ticks = sequence_length_ticks
    _num_iterations = num_iterations

    dataset_manager = DatasetManager()
    chorale_dataset_kwargs = {
        'voice_ids': [0, 1, 2, 3],
        'metadatas': metadatas,
        'sequences_size': 8,
        'subdivision': 4
    }

    _bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
        name='bach_chorales', **chorale_dataset_kwargs)
    bach_chorales_dataset = _bach_chorales_dataset

    assert sequence_length_ticks % bach_chorales_dataset.subdivision == 0

    global deepbach
    deepbach = DeepBach(dataset=bach_chorales_dataset,
                        note_embedding_dim=note_embedding_dim,
                        meta_embedding_dim=meta_embedding_dim,
                        num_layers=num_layers,
                        lstm_hidden_size=lstm_hidden_size,
                        dropout_lstm=dropout_lstm,
                        linear_hidden_size=linear_hidden_size)
    deepbach.load()
    deepbach.cuda()

    # launch the script
    # use threaded=True to fix Chrome/Chromium engine hanging on requests
    # [https://stackoverflow.com/a/30670626]
    local_only = False
    if local_only:
        # accessible only locally:
        app.run(threaded=True)
    else:
        # accessible from outside:
        app.run(host='0.0.0.0', port=port, threaded=True)
Ejemplo n.º 7
0
def main(include_transpositions):
    dataset_manager = DatasetManager()

    print('step 1/3: prepare dataset')
    metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()]
    chorale_dataset_kwargs = {
        'voice_ids': [0, 1, 2, 3],
        'metadatas': metadatas,
        'sequences_size': 8,
        'subdivision': 4,
        'include_transpositions': include_transpositions,
    }

    bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
        name='bach_chorales', **chorale_dataset_kwargs)
    dataset = bach_chorales_dataset
    get_pairs(dataset, model_ids=[5, 9])
Ejemplo n.º 8
0
    def setup(self):
        """Load the model"""

        # music21.environment.set("musicxmlPath", "/bin/true")

        note_embedding_dim = 20
        meta_embedding_dim = 20
        num_layers = 2
        lstm_hidden_size = 256
        dropout_lstm = 0.5
        linear_hidden_size = 256
        batch_size = 256
        num_epochs = 5
        train = False
        num_iterations = 500
        sequence_length_ticks = 64

        dataset_manager = DatasetManager()

        metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()]
        chorale_dataset_kwargs = {
            "voice_ids": [0, 1, 2, 3],
            "metadatas": metadatas,
            "sequences_size": 8,
            "subdivision": 4,
        }
        bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
            name="bach_chorales", **chorale_dataset_kwargs
        )
        dataset = bach_chorales_dataset

        self.deepbach = DeepBach(
            dataset=dataset,
            note_embedding_dim=note_embedding_dim,
            meta_embedding_dim=meta_embedding_dim,
            num_layers=num_layers,
            lstm_hidden_size=lstm_hidden_size,
            dropout_lstm=dropout_lstm,
            linear_hidden_size=linear_hidden_size,
        )

        self.deepbach.load()

        # load fluidsynth fo rmidi 2 audio conversion
        self.fs = FluidSynth()
Ejemplo n.º 9
0
def init_app(
    note_embedding_dim,
    meta_embedding_dim,
    num_layers,
    lstm_hidden_size,
    dropout_lstm,
    input_dropout,
    linear_hidden_size,
):
    metadatas = [
        TickMetadata(subdivision=4),
    ]

    dataset_manager = DatasetManager()
    chorale_dataset_kwargs = {
        'voice_ids': [0],
        'metadatas': metadatas,
        'sequences_size': 20,
        'subdivision': 4
    }

    bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
        name='bach_chorales', **chorale_dataset_kwargs)

    global model
    model = AnticipationRNN(
        chorale_dataset=bach_chorales_dataset,
        note_embedding_dim=note_embedding_dim,
        metadata_embedding_dim=meta_embedding_dim,
        num_layers=num_layers,
        num_lstm_constraints_units=lstm_hidden_size,
        num_lstm_generation_units=lstm_hidden_size,
        linear_hidden_size=linear_hidden_size,
        dropout_prob=dropout_lstm,
        dropout_input_prob=input_dropout,
        unary_constraint=True,
    )
    model.load()
    model.cuda()

    # launch the script
    # accessible only locally:
    app.run()
Ejemplo n.º 10
0
    def _dataset(self):
        if self.negative_sampling_method == 'random':
            dataset_manager = DatasetManager()
            self.cache_dir = dataset_manager.cache_dir
            num_tokens_per_beat = subdivision * num_voices
            num_tokens = self.num_tokens_per_block * (self.num_blocks_left +
                                                      self.num_blocks_right)

            assert num_tokens % num_tokens_per_beat == 0

            # Positive dataset
            num_beats_positive = num_tokens // num_tokens_per_beat
            chorale_dataset_positive_kwargs = {
                'voice_ids': [0, 1, 2, 3],
                'metadatas': metadatas,
                'sequences_size': num_beats_positive,
                'subdivision': subdivision,
            }

            dataset_positive: ChoraleBeatsDataset = dataset_manager.get_dataset(
                name='bach_chorales_beats_test',
                **chorale_dataset_positive_kwargs)
            num_tokens_per_beat = subdivision * num_voices
            num_beats_negative = self.num_tokens_per_block // num_tokens_per_beat
            chorale_dataset_negative_kwargs = {
                'voice_ids': [0, 1, 2, 3],
                'metadatas': metadatas,
                'sequences_size': num_beats_negative,
                'subdivision': subdivision,
            }

            dataset_negative: ChoraleBeatsDataset = dataset_manager.get_dataset(
                name='bach_chorales_beats_test',
                **chorale_dataset_negative_kwargs)
            return dataset_positive, dataset_negative
        else:
            raise NotImplementedError
Ejemplo n.º 11
0
        for chord_root_index, chord_name_index \
                in zip(tensor_chords_root_np,
                       tensor_chords_name_np
                       ):
            chord_root_index = chord_root_index.item()
            chord_name_index = chord_name_index.item()
            chord_desc = index2chordroot[chord_root_index] + \
                         index2chordname[chord_name_index]
            chord_list.append(chord_desc)
        return score, chord_list


if __name__ == '__main__':
    from DatasetManager.dataset_manager import DatasetManager

    dataset_manager = DatasetManager()
    leadsheet_dataset_kwargs = {
        'sequences_size': 64,
    }

    lsdb_dataset: LsdbDataset = dataset_manager.get_dataset(
        name='lsdb_test', **leadsheet_dataset_kwargs)
    dl, _, _ = lsdb_dataset.data_loaders(1)
    tensor_lead, tensor_chord_root, tensor_chord_name = next(dl.__iter__())
    print(tensor_lead[0].size(), tensor_chord_root[0].size(),
          tensor_chord_name[0].size())
    tensor_chord = (tensor_chord_root[0], tensor_chord_name[0])
    score, chord_list = lsdb_dataset.tensor_leadsheet_to_score_and_chord_list(
        tensor_lead[0], tensor_chord)
    score.show()
    print(chord_list)
Ejemplo n.º 12
0
        metadata_tensor_dataset = torch.cat(metadata_tensor_dataset, 0)
        num_datapoints, length, num_metadata = metadata_tensor_dataset.size()
        metadata_tensor_dataset = metadata_tensor_dataset.view(
            num_datapoints, 1, length, num_metadata)
        dataset = TensorDataset(score_tensor_dataset, metadata_tensor_dataset)
        print(f'Sizes: {score_tensor_dataset.size()}')
        print(f'Sizes: {metadata_tensor_dataset.size()}')
        return dataset


if __name__ == '__main__':

    from DatasetManager.dataset_manager import DatasetManager
    from DatasetManager.metadata import BeatMarkerMetadata, TickMetadata

    dataset_manager = DatasetManager()
    metadatas = [
        BeatMarkerMetadata(subdivision=6),
        TickMetadata(subdivision=6)
    ]
    folk_dataset_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 1,
        'train': True
    }
    folk_dataset: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_chords', **folk_dataset_kwargs)
    (train_dataloader, val_dataloader,
     test_dataloader) = folk_dataset.data_loaders(batch_size=100,
                                                  split=(0.7, 0.2))
Ejemplo n.º 13
0
    def _dataset(self):
        """
        Loads the appropriate dataset depending on the sampling method
        :return: ChoraleDataset or tuple(ChoraleDataset)
        """

        dataset_manager = DatasetManager()
        self.cache_dir = dataset_manager.cache_dir

        if self.negative_sampling_method == 'random_bad':
            num_tokens_per_beat = subdivision * num_voices
            num_tokens = self.num_tokens_per_block * (self.num_blocks_left +
                                                      self.num_blocks_right)

            assert num_tokens % num_tokens_per_beat == 0

            # Positive dataset
            num_beats_positive = num_tokens // num_tokens_per_beat
            chorale_dataset_positive_kwargs = {
                'voice_ids': [0, 1, 2, 3],
                'metadatas': metadatas,
                'sequences_size': num_beats_positive,
                'subdivision': subdivision,
            }

            dataset: ChoraleBeatsDataset = dataset_manager.get_dataset(
                name='bach_chorales_beats', **chorale_dataset_positive_kwargs)

            return dataset
        elif self.negative_sampling_method == 'same_sequence':
            # FIXME for the moment, exactly the same as 'random' _dataset
            dataset_manager = DatasetManager()
            num_tokens_per_beat = subdivision * num_voices
            num_tokens = self.num_tokens_per_block * (self.num_blocks_left +
                                                      self.num_blocks_right)

            assert num_tokens % num_tokens_per_beat == 0

            # Positive dataset
            num_beats_positive = num_tokens // num_tokens_per_beat
            chorale_dataset_positive_kwargs = {
                'voice_ids': [0, 1, 2, 3],
                'metadatas': metadatas,
                'sequences_size': num_beats_positive,
                'subdivision': subdivision,
            }

            dataset: ChoraleBeatsDataset = dataset_manager.get_dataset(
                name='bach_chorales_beats', **chorale_dataset_positive_kwargs)

            return dataset
        if self.negative_sampling_method == 'random':
            dataset_manager = DatasetManager()
            num_tokens_per_beat = subdivision * num_voices
            num_tokens = self.num_tokens_per_block * (self.num_blocks_left +
                                                      self.num_blocks_right)

            assert num_tokens % num_tokens_per_beat == 0

            # Positive dataset
            num_beats_positive = num_tokens // num_tokens_per_beat
            chorale_dataset_positive_kwargs = {
                'voice_ids': [0, 1, 2, 3],
                'metadatas': metadatas,
                'sequences_size': num_beats_positive,
                'subdivision': subdivision,
            }

            dataset_positive: ChoraleBeatsDataset = dataset_manager.get_dataset(
                name='bach_chorales_beats', **chorale_dataset_positive_kwargs)
            num_tokens_per_beat = subdivision * num_voices
            num_beats_negative = self.num_tokens_per_block // num_tokens_per_beat
            chorale_dataset_negative_kwargs = {
                'voice_ids': [0, 1, 2, 3],
                'metadatas': metadatas,
                'sequences_size': num_beats_negative,
                'subdivision': subdivision,
            }

            dataset_negative: ChoraleBeatsDataset = dataset_manager.get_dataset(
                name='bach_chorales_beats', **chorale_dataset_negative_kwargs)
            return dataset_positive, dataset_negative
        else:
            raise NotImplementedError
Ejemplo n.º 14
0
def main(block_attention, hierarchical, nade, num_layers, dropout,
         input_dropout, input_dropout_token, per_head_dim, num_heads,
         local_position_embedding_dim, position_ff_dim, enc_dec_conditioning,
         lr, batch_size, num_epochs, action, loss_on_last_frame, mixup,
         midi_input, temperature, num_batches, label_smoothing,
         scheduled_training, dataset_type, conditioning, double_conditioning,
         instrument_presence_in_encoder, cpc_config_name, num_examples_sampled,
         suffix, subdivision, sequence_size, velocity_quantization,
         max_transposition, group_instrument_per_section):
    # Use all gpus available
    gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())]
    print(gpu_ids)

    config = get_config()

    num_layers_l = [2, 3, 4, 5, 6]
    enc_dec_conditionings_l = ['split', 'single']
    sequence_sizes_l = [3, 5, 7]
    grid_search = False
    if grid_search:
        configs = list(
            itertools.product(
                *[num_layers_l, enc_dec_conditionings_l, sequence_sizes_l]))
        write_suffix = True
    else:
        configs = [(num_layers, enc_dec_conditioning, sequence_size)]
        write_suffix = False

    for this_config in configs:
        num_layers, enc_dec_conditioning, sequence_size = this_config
        if write_suffix:
            this_suffix = f'{suffix}_{num_layers}_{enc_dec_conditioning}_{sequence_size}'
        else:
            this_suffix = suffix

        # Get dataset
        dataset_manager = DatasetManager()
        dataset, processor_decoder, processor_encoder, processor_encodencoder = \
            dataset_import.get_dataset(dataset_manager, dataset_type, subdivision, sequence_size, velocity_quantization,
                                       max_transposition,
                                       num_heads, per_head_dim, local_position_embedding_dim, block_attention,
                                       group_instrument_per_section, nade, cpc_config_name, double_conditioning,
                                       instrument_presence_in_encoder)

        reduction_flag = dataset_type in [
            'reduction', 'reduction_small', 'reduction_large',
            'reduction_categorical', 'reduction_categorical_small',
            'reduction_midiPiano', 'reduction_midiPiano_small'
        ]

        if not conditioning:
            print("NO CONDITIONING ????!!!!!!!!!!!!")

        model = Transformer(dataset=dataset,
                            data_processor_encodencoder=processor_encodencoder,
                            data_processor_encoder=processor_encoder,
                            data_processor_decoder=processor_decoder,
                            num_heads=num_heads,
                            per_head_dim=per_head_dim,
                            position_ff_dim=position_ff_dim,
                            enc_dec_conditioning=enc_dec_conditioning,
                            hierarchical_encoding=hierarchical,
                            block_attention=block_attention,
                            nade=nade,
                            conditioning=conditioning,
                            double_conditioning=double_conditioning,
                            num_layers=num_layers,
                            dropout=dropout,
                            input_dropout=input_dropout,
                            input_dropout_token=input_dropout_token,
                            lr=lr,
                            reduction_flag=reduction_flag,
                            gpu_ids=gpu_ids,
                            suffix=this_suffix,
                            mixup=mixup,
                            scheduled_training=scheduled_training)

        if action in ['generate', 'visualize']:
            model.load()
            overfit_flag = False
        elif action in [
                'generate_overfit', 'train_from_checkpoint',
                'visualize_overfit'
        ]:
            model.load_overfit()
            overfit_flag = True

        model.cuda()

        if action in ['train', 'train_from_checkpoint']:
            print(f"Train the model on gpus {gpu_ids}")
            model.train_model(cache_dir=dataset_manager.cache_dir,
                              batch_size=batch_size,
                              num_epochs=num_epochs,
                              num_batches=num_batches,
                              label_smoothing=label_smoothing,
                              loss_on_last_frame=loss_on_last_frame)
            overfit_flag = True

        if action in ['generate', 'generate_overfit']:
            print('Generation')
            ascii_melody = MARIO_MELODY
            # score, tensor_chorale, tensor_metadata = mode.generation_from_ascii(
            #     ascii_melody=ascii_melody
            # )
            # score.show()
            # score, tensor_chorale, tensor_metadata = model.generation(
            #     num_tokens_per_beat=8,
            #     num_beats=64 * 4,
            #     temperature=1.
            # )
            # score, tensor_chorale, tensor_metadata = model.generation(
            #     num_tokens_per_beat=8,
            #     num_beats=64 * 4,
            #     num_experiments=4,
            #     link_experiments=False,
            #     temperature=1.2
            # )
            # score, tensor_chorale, tensor_metadata = model.plot_attentions()
            # score, tensor_chorale, tensor_metadata = model.unconstrained_generation(
            #     num_tokens_per_beat=8,
            #     num_beats=64 * 4)

            if dataset_type in [
                    'arrangement', 'arrangement_small',
                    'arrangement_midiPiano', 'arrangement_midiPiano_small',
                    'arrangement_voice', 'arrangement_voice_small'
            ]:
                # (oppposite to standard) increasing temperature reduce agitation
                # Cold means all event will eventually have almost same proba
                # Hot accentuates spikes

                # Number of complete pass over all time frames in, non auto-regressive sampling schemes
                number_sampling_steps = 1
                #  Allows to override dataset quantization for generation
                subdivision_generation = subdivision
                # banned_instruments = ["Violin_1", "Violin_2", "Violoncello", "Contrabass", "Viola"]
                banned_instruments = []
                # Used for instruments_presence model
                unknown_instruments = []
                source_folder = f"{config['datapath']}/source_for_generation/"
                sources = [
                    {
                        "source_path":
                        source_folder + "mouss_tableaux_small.xml",
                        "writing_name": "mouss_tableaux_small",
                        "writing_tempo": "adagio",
                    },
                    # {"source_path": source_folder + "guillaume_1.mid",
                    #  "writing_name": "guillaume_1",
                    #  "writing_tempo": "adagio"
                    #  },
                    # {"source_path": source_folder + "guillaume_2.xml",
                    #  "writing_name": "guillaume_2",
                    #  "writing_tempo": "adagio"
                    #  },
                    {
                        "source_path":
                        source_folder + "chopin_Prel_Op28_20.xml",
                        "writing_name": "chopin_Prel_Op28_20",
                        "writing_tempo": "largo"
                    },
                    {
                        "source_path": source_folder + "b_1_1.xml",
                        "writing_name": "b_1_1",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path": source_folder + "b_3_3.xml",
                        "writing_name": "b_3_3",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path": source_folder + "b_3_4.xml",
                        "writing_name": "b_3_4",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path": source_folder + "b_7_2.xml",
                        "writing_name": "b_7_2",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path": source_folder + "testpiano.xml",
                        "writing_name": "testpiano",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path": source_folder + "schubert_21_1.xml",
                        "writing_name": "schubert_21_1",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path": source_folder + "schubert_20_1.xml",
                        "writing_name": "schubert_20_1",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path": source_folder + "Mozart_Nachtmusik.xml",
                        "writing_name": "Mozart_Nachtmusik",
                        "writing_tempo": "adagio"
                    },
                ]
                if overfit_flag:
                    write_dir = model.log_dir_overfitted
                else:
                    write_dir = model.log_dir

                if midi_input is not None:
                    sources = [{
                        'source_path': f'midi_inputs/{midi_input}',
                        'writing_name': f'{midi_input}',
                        'writing_tempo': 'adagio'
                    }]
                    write_dir = 'midi_inputs'

                for source in sources:
                    generation_from_file(
                        model=model,
                        temperature=temperature,
                        batch_size=num_examples_sampled,
                        filepath=source["source_path"],
                        write_dir=write_dir,
                        write_name=source["writing_name"],
                        banned_instruments=banned_instruments,
                        unknown_instruments=unknown_instruments,
                        writing_tempo=source["writing_tempo"],
                        subdivision=subdivision_generation,
                        number_sampling_steps=number_sampling_steps)

            elif dataset_type in [
                    'reduction', 'reduction_large', 'reduction_small',
                    'reduction_categorical', 'reduction_categorical_small'
            ]:
                #  Allows to override dataset quantization for generation
                subdivision_generation = 8
                source_folder = f"{config['datapath']}/source_for_generation/"
                sources = [
                    {
                        "source_path": source_folder + "b_7_2_orch.xml",
                        "writing_name": "b_7_2_orch",
                        "writing_tempo": "adagio"
                    },
                    # {"source_path": source_folder + "mouss_tableaux_orch.xml",
                    #  "writing_name": "mouss_tableaux_orch",
                    #  "writing_tempo": "adagio"
                    #  },
                    # {"source_path": source_folder + "Debussy_SuiteBergam_Passepied_orch.xml",
                    #  "writing_name": "Debussy_SuiteBergam_Passepied_orch",
                    #  "writing_tempo": "adagio"
                    #  },
                    # {
                    #     "source_path": source_folder + "Romantic Concert Piece for Brass Orchestra_orch.xml",
                    #     "writing_name": "Romantic Concert Piece for Brass Orchestra_orch",
                    #     "writing_tempo": "adagio"
                    #  },
                    # {
                    #     "source_path": source_folder + "mozart_25_1.xml",
                    #     "writing_name": "mozart_25_1",
                    #     "writing_tempo": "adagio"
                    # },
                    # {
                    #     "source_path": source_folder + "mozart_25_2.xml",
                    #     "writing_name": "mozart_25_2",
                    #     "writing_tempo": "adagio"
                    # },
                    # {
                    #     "source_path": source_folder + "mozart_25_3.xml",
                    #     "writing_name": "mozart_25_3",
                    #     "writing_tempo": "adagio"
                    # },
                    {
                        "source_path":
                        source_folder + "brahms_symphony_2_1.xml",
                        "writing_name": "brahms_symphony_2_1",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path":
                        source_folder + "haydn_symphony_91_1.xml",
                        "writing_name": "haydn_symphony_91_1",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path":
                        source_folder + "mozart_symphony_183_4.xml",
                        "writing_name": "mozart_symphony_183_4",
                        "writing_tempo": "adagio"
                    },
                    {
                        "source_path":
                        source_folder + "mozart_symphony_183_2.xml",
                        "writing_name": "mozart_symphony_183_2",
                        "writing_tempo": "adagio"
                    },
                ]
                for source in sources:
                    reduction_from_file(model=model,
                                        temperature=temperature,
                                        batch_size=num_examples_sampled,
                                        filepath=source["source_path"],
                                        write_name=source["writing_name"],
                                        overfit_flag=overfit_flag,
                                        writing_tempo=source["writing_tempo"],
                                        subdivision=subdivision_generation)

            elif dataset_type == "lsdb":
                score, tensor_chorale, tensor_metadata = model.generation()
                score.write('xml', 'results/test.xml')

            elif dataset_type in ['bach', 'bach_small']:
                if nade and (not conditioning):
                    scores = generation_bach_nade(
                        model=model,
                        temperature=temperature,
                        ascii_melody=ascii_melody,
                        batch_size=num_examples_sampled,
                        force_melody=False,
                    )
                else:
                    scores = generation_bach(model=model,
                                             temperature=temperature,
                                             ascii_melody=ascii_melody,
                                             batch_size=num_examples_sampled,
                                             force_melody=False)

                if overfit_flag:
                    writing_dir = model.log_dir_overfitted
                else:
                    writing_dir = model.log_dir

                for batch_index, score in enumerate(scores):
                    score.write('xml', f'{writing_dir}/{batch_index}.xml')
        elif action in ['visualize', 'visualize_overfit']:
            log_dir = model.log_dir if action == 'visualize' else model.log_dir_overfitted
            visualize_arrangement(model, batch_size, log_dir)
    return
Ejemplo n.º 15
0
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers,
         encoder_hidden_size, encoder_dropout_prob, latent_space_dim,
         num_decoder_layers, decoder_hidden_size, decoder_dropout_prob,
         has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size,
         latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm,
         input_dropout, linear_hidden_size, batch_size, num_target,
         num_models):

    # init dataset
    dataset_manager = DatasetManager()
    metadatas = [
        BeatMarkerMetadata(subdivision=6),
        TickMetadata(subdivision=6)
    ]
    mvae_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_dataset_vae: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **mvae_train_kwargs)
    # init vae model
    vae_model = MeasureVAE(dataset=folk_dataset_vae,
                           note_embedding_dim=note_embedding_dim,
                           metadata_embedding_dim=metadata_embedding_dim,
                           num_encoder_layers=num_encoder_layers,
                           encoder_hidden_size=encoder_hidden_size,
                           encoder_dropout_prob=encoder_dropout_prob,
                           latent_space_dim=latent_space_dim,
                           num_decoder_layers=num_decoder_layers,
                           decoder_hidden_size=decoder_hidden_size,
                           decoder_dropout_prob=decoder_dropout_prob,
                           has_metadata=has_metadata)
    vae_model.load()  # VAE model must be pre-trained
    if torch.cuda.is_available():
        vae_model.cuda()
    folk_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_test_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': False
    }
    folk_dataset_train: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **folk_train_kwargs)
    folk_dataset_test: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **folk_test_kwargs)

    # init latent_rnn model and latent_rnn_tester
    latent_rnn_model = LatentRNN(dataset=folk_dataset_train,
                                 vae_model=vae_model,
                                 num_rnn_layers=num_latent_rnn_layers,
                                 rnn_hidden_size=latent_rnn_hidden_size,
                                 dropout=latent_rnn_dropout_prob,
                                 rnn_class=torch.nn.GRU,
                                 auto_reg=False,
                                 teacher_forcing=True)
    latent_rnn_model.load()  # latent_rnn model must be pre-trained
    if torch.cuda.is_available():
        latent_rnn_model.cuda()
    latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test,
                                        model=latent_rnn_model)

    # inti arnn model and arnn_testes
    arnn_model = ConstraintModelGaussianReg(
        dataset=folk_dataset_train,
        note_embedding_dim=note_embedding_dim,
        metadata_embedding_dim=metadata_embedding_dim,
        num_layers=num_layers,
        num_lstm_constraints_units=lstm_hidden_size,
        num_lstm_generation_units=lstm_hidden_size,
        linear_hidden_size=linear_hidden_size,
        dropout_prob=dropout_lstm,
        dropout_input_prob=input_dropout,
        unary_constraint=True,
        teacher_forcing=True)
    arnn_model.load()  # ARNN model must be pre-trained
    if torch.cuda.is_available():
        arnn_model.cuda()
    arnn_tester = AnticipationRNNTester(dataset=folk_dataset_test,
                                        model=arnn_model)

    arnn_baseline_model = AnticipationRNNBaseline(
        dataset=folk_dataset_train,
        note_embedding_dim=note_embedding_dim,
        metadata_embedding_dim=metadata_embedding_dim,
        num_layers=num_layers,
        num_lstm_constraints_units=lstm_hidden_size,
        num_lstm_generation_units=lstm_hidden_size,
        linear_hidden_size=linear_hidden_size,
        dropout_prob=dropout_lstm,
        dropout_input_prob=input_dropout,
        unary_constraint=True,
        teacher_forcing=True)
    arnn_baseline_model.load()  # ARNN model must be pre-trained
    if torch.cuda.is_available():
        arnn_baseline_model.cuda()
    arnn_baseline_tester = AnticipationRNNTester(dataset=folk_dataset_test,
                                                 model=arnn_baseline_model)

    # create test dataloader
    (_, _,
     test_dataloader) = folk_dataset_test.data_loaders(batch_size=batch_size,
                                                       split=(0.01, 0.01))

    # test
    print('Num Test Batches: ', len(test_dataloader))
    latent_rnn_mean_loss, latent_rnn_mean_accuracy, \
    arnn_mean_loss, arnn_mean_accuracy, \
    arnn_baseline_mean_loss, arnn_baseline_mean_accuracy = loss_and_acc_test(
        data_loader=test_dataloader,
        latent_rnn_tester=latent_rnn_tester,
        arnn_tester=arnn_tester,
        arnn_baseline_tester=arnn_baseline_tester,
        num_target_measures=num_target,
        num_models=num_models
    )
    print('Test Epoch:')
    print('latent_rnn Test Loss: ', latent_rnn_mean_loss, '\n'
          'latent_rnn Test Accuracy: ', latent_rnn_mean_accuracy * 100, '\n'
          'ARNN Test Loss: ', arnn_mean_loss, '\n'
          'ARNN Test Accuracy: ', arnn_mean_accuracy * 100, '\n'
          'ARNN Baseline Test Loss: ', arnn_baseline_mean_loss, '\n'
          'ARNN Baseline Test Accuracy: ', arnn_baseline_mean_accuracy * 100,
          '\n')
Ejemplo n.º 16
0
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers,
         encoder_hidden_size, encoder_dropout_prob, latent_space_dim,
         num_decoder_layers, decoder_hidden_size, decoder_dropout_prob,
         has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size,
         latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm,
         input_dropout, linear_hidden_size, batch_size, num_target,
         num_models):

    random.seed(0)

    # init dataset
    dataset_manager = DatasetManager()
    metadatas = [
        BeatMarkerMetadata(subdivision=6),
        TickMetadata(subdivision=6)
    ]
    mvae_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_dataset_vae: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **mvae_train_kwargs)
    # init vae model
    vae_model = MeasureVAE(dataset=folk_dataset_vae,
                           note_embedding_dim=note_embedding_dim,
                           metadata_embedding_dim=metadata_embedding_dim,
                           num_encoder_layers=num_encoder_layers,
                           encoder_hidden_size=encoder_hidden_size,
                           encoder_dropout_prob=encoder_dropout_prob,
                           latent_space_dim=latent_space_dim,
                           num_decoder_layers=num_decoder_layers,
                           decoder_hidden_size=decoder_hidden_size,
                           decoder_dropout_prob=decoder_dropout_prob,
                           has_metadata=has_metadata)
    vae_model.load()  # VAE model must be pre-trained
    if torch.cuda.is_available():
        vae_model.cuda()
    folk_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_test_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': False
    }
    folk_dataset_train: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **folk_train_kwargs)
    folk_dataset_test: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **folk_test_kwargs)

    # Initialize stuff
    test_filenames = folk_dataset_test.dataset_filenames
    num_melodies = 32
    num_measures = 16
    req_length = num_measures * 4 * 6
    num_past = 6
    num_future = 6
    num_target = 4
    cur_dir = os.path.dirname(os.path.realpath(__file__))
    save_folder = 'saved_midi/'

    # First save original data
    for i in tqdm(range(num_melodies)):
        f = test_filenames[i]
        f_id = f[:-4]
        # save original scores
        save_filename = os.path.join(cur_dir,
                                     save_folder + f_id + '_original.mid')
        if os.path.isfile(save_filename):
            continue
        f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f)
        score = folk_dataset_test.corpus_it_gen.get_score_from_path(
            f, fix_and_expand=True)
        score_tensor = folk_dataset_test.get_score_tensor(score)
        metadata_tensor = folk_dataset_test.get_metadata_tensor(score)
        # ignore scores with less than 16 measures
        if score_tensor.size(1) < req_length:
            continue
        score_tensor = score_tensor[:, :req_length]
        metadata_tensor = metadata_tensor[:, :req_length, :]
        trunc_score = folk_dataset_test.tensor_to_score(score_tensor)
        trunc_score.write('midi', fp=save_filename)

    # Initialize models and testers
    latent_rnn_model = LatentRNN(dataset=folk_dataset_train,
                                 vae_model=vae_model,
                                 num_rnn_layers=num_latent_rnn_layers,
                                 rnn_hidden_size=latent_rnn_hidden_size,
                                 dropout=latent_rnn_dropout_prob,
                                 rnn_class=torch.nn.GRU,
                                 auto_reg=False,
                                 teacher_forcing=True)
    latent_rnn_model.load()  # Latent RNN model must be pre-trained
    if torch.cuda.is_available():
        latent_rnn_model.cuda()
    latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test,
                                        model=latent_rnn_model)

    def process_latent_rnn_batch(score_tensor,
                                 num_past=6,
                                 num_future=6,
                                 num_target=4):
        assert (num_past + num_future + num_target == 16)
        score_tensor = score_tensor.unsqueeze(0)
        score_tensor = LatentRNNTrainer.split_to_measures(score_tensor, 24)
        tensor_past, tensor_future, tensor_target = LatentRNNTrainer.split_score(
            score_tensor=score_tensor,
            num_past=num_past,
            num_future=num_future,
            num_target=num_target,
            measure_seq_len=24)
        return tensor_past, tensor_future, tensor_target

    # Second save latent_rnn generations
    for i in tqdm(range(num_melodies)):
        f = test_filenames[i]
        f_id = f[:-4]
        save_filename = os.path.join(cur_dir,
                                     save_folder + f_id + '_latent_rnn.mid')
        if os.path.isfile(save_filename):
            continue
        f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f)
        score = folk_dataset_test.corpus_it_gen.get_score_from_path(
            f, fix_and_expand=True)
        score_tensor = folk_dataset_test.get_score_tensor(score)
        # metadata_tensor = folk_dataset_test.get_metadata_tensor(score)
        # ignore scores with less than 16 measures
        if score_tensor.size(1) < req_length:
            continue
        score_tensor = score_tensor[:, :req_length]
        # metadata_tensor = metadata_tensor[:, :req_length, :]
        # save regeneration using latent_rnn
        tensor_past, tensor_future, tensor_target = process_latent_rnn_batch(
            score_tensor, num_past, num_future, num_target)
        # forward pass through latent_rnn
        weights, gen_target, _ = latent_rnn_tester.model(
            past_context=tensor_past,
            future_context=tensor_future,
            target=tensor_target,
            measures_to_generate=num_target,
            train=False,
        )
        # convert to score
        batch_size, _, _ = gen_target.size()
        gen_target = gen_target.view(batch_size, num_target, 24)
        gen_score_tensor = torch.cat((tensor_past, gen_target, tensor_future),
                                     1)
        latent_rnn_score = folk_dataset_test.tensor_to_score(
            gen_score_tensor.cpu())
        latent_rnn_score.write('midi', fp=save_filename)

    # Intialize arnn model and arnn_tester
    arnn_model = ConstraintModelGaussianReg(
        dataset=folk_dataset_train,
        note_embedding_dim=note_embedding_dim,
        metadata_embedding_dim=metadata_embedding_dim,
        num_layers=num_layers,
        num_lstm_constraints_units=lstm_hidden_size,
        num_lstm_generation_units=lstm_hidden_size,
        linear_hidden_size=linear_hidden_size,
        dropout_prob=dropout_lstm,
        dropout_input_prob=input_dropout,
        unary_constraint=True,
        teacher_forcing=True)
    arnn_model.load()  # ARNN model must be pre-trained
    if torch.cuda.is_available():
        arnn_model.cuda()
    arnn_tester = AnticipationRNNTester(dataset=folk_dataset_test,
                                        model=arnn_model)

    def process_arnn_batch(score_tensor,
                           metadata_tensor,
                           arnn_tester,
                           num_past=6,
                           num_target=4):
        score_tensor = score_tensor.unsqueeze(0)
        metadata_tensor = metadata_tensor.unsqueeze(0)
        tensor_score = to_cuda_variable_long(score_tensor)
        tensor_metadata = to_cuda_variable_long(metadata_tensor)
        constraints_location, start_tick, end_tick = arnn_tester.get_constraints_location(
            tensor_score,
            is_stochastic=False,
            start_measure=num_past,
            num_measures=num_target)
        arnn_batch = (tensor_score, tensor_metadata, constraints_location,
                      start_tick, end_tick)
        return arnn_batch

    # Third save ARNN-Reg generations
    for i in tqdm(range(num_melodies)):
        f = test_filenames[i]
        f_id = f[:-4]
        save_filename = os.path.join(cur_dir,
                                     save_folder + f_id + '_arnn_reg.mid')
        if os.path.isfile(save_filename):
            continue
        f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f)
        score = folk_dataset_test.corpus_it_gen.get_score_from_path(
            f, fix_and_expand=True)
        score_tensor = folk_dataset_test.get_score_tensor(score)
        metadata_tensor = folk_dataset_test.get_metadata_tensor(score)
        # ignore scores with less than 16 measures
        if score_tensor.size(1) < req_length:
            continue
        score_tensor = score_tensor[:, :req_length]
        metadata_tensor = metadata_tensor[:, :req_length, :]
        # save regeneration using latent_rnn
        tensor_score, tensor_metadata, constraints_location, start_tick, end_tick = \
            process_arnn_batch(score_tensor, metadata_tensor, arnn_tester, num_past, num_target)
        # forward pass through latent_rnn
        _, gen_target = arnn_tester.model.forward_inpaint(
            score_tensor=tensor_score,
            metadata_tensor=tensor_metadata,
            constraints_loc=constraints_location,
            start_tick=start_tick,
            end_tick=end_tick,
        )
        # convert to score
        arnn_score = folk_dataset_test.tensor_to_score(gen_target.cpu())
        arnn_score.write('midi', fp=save_filename)

    # Intialize arnn-baseline model and arnn_tester
    arnn_baseline_model = AnticipationRNNBaseline(
        dataset=folk_dataset_train,
        note_embedding_dim=note_embedding_dim,
        metadata_embedding_dim=metadata_embedding_dim,
        num_layers=num_layers,
        num_lstm_constraints_units=lstm_hidden_size,
        num_lstm_generation_units=lstm_hidden_size,
        linear_hidden_size=linear_hidden_size,
        dropout_prob=dropout_lstm,
        dropout_input_prob=input_dropout,
        unary_constraint=True,
        teacher_forcing=True)
    arnn_baseline_model.load()  # ARNN model must be pre-trained
    if torch.cuda.is_available():
        arnn_baseline_model.cuda()
    arnn_baseline_tester = AnticipationRNNTester(dataset=folk_dataset_test,
                                                 model=arnn_baseline_model)
    # Fourth save ARNN-Baseline generations
    for i in tqdm(range(num_melodies)):
        f = test_filenames[i]
        f_id = f[:-4]
        save_filename = os.path.join(cur_dir,
                                     save_folder + f_id + '_arnn_baseline.mid')
        if os.path.isfile(save_filename):
            continue
        f = os.path.join(folk_dataset_test.corpus_it_gen.raw_dataset_dir, f)
        score = folk_dataset_test.corpus_it_gen.get_score_from_path(
            f, fix_and_expand=True)
        score_tensor = folk_dataset_test.get_score_tensor(score)
        metadata_tensor = folk_dataset_test.get_metadata_tensor(score)
        # ignore scores with less than 16 measures
        if score_tensor.size(1) < req_length:
            continue
        score_tensor = score_tensor[:, :req_length]
        metadata_tensor = metadata_tensor[:, :req_length, :]
        # save regeneration using latent_rnn
        tensor_score, tensor_metadata, constraints_location, start_tick, end_tick = \
            process_arnn_batch(score_tensor, metadata_tensor, arnn_baseline_tester, num_past, num_target)
        # forward pass through latent_rnn
        _, gen_target = arnn_baseline_tester.model.forward_inpaint(
            score_tensor=tensor_score,
            metadata_tensor=tensor_metadata,
            constraints_loc=constraints_location,
            start_tick=start_tick,
            end_tick=end_tick,
        )
        # convert to score
        arnn_baseline_score = folk_dataset_test.tensor_to_score(
            gen_target.cpu())
        arnn_baseline_score.write('midi', fp=save_filename)
Ejemplo n.º 17
0
from grader.grader import score_chorale
from DatasetManager.chorale_dataset import ChoraleDataset
from DatasetManager.dataset_manager import DatasetManager, all_datasets
from DatasetManager.metadata import FermataMetadata, TickMetadata, KeyMetadata
from DatasetManager.helpers import GeneratedChoraleIteratorGen

from DeepBach.model_manager import DeepBach
from DeepBach.helpers import *

print('step 1/3: prepare dataset')
dataset_manager = DatasetManager()
metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()]
chorale_dataset_kwargs = {
    'voice_ids': [1, 1, 2, 3],
    'metadatas': metadatas,
    'sequences_size': 8,
    'subdivision': 4,
    'include_transpositions': False,
}

bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
    name='bach_chorales', **chorale_dataset_kwargs)
dataset = bach_chorales_dataset
load_or_pickle_distributions(dataset)

print(dataset.gaussian.covariances_)

# chorale = converter.parse('generations/6/c187.mid')
# score = score_chorale(chorale, dataset)
# print(score)
Ejemplo n.º 18
0
def main(note_embedding_dim,
         metadata_embedding_dim,
         num_encoder_layers,
         encoder_hidden_size,
         encoder_dropout_prob,
         latent_space_dim,
         num_decoder_layers,
         decoder_hidden_size,
         decoder_dropout_prob,
         has_metadata,
         num_latent_rnn_layers,
         latent_rnn_hidden_size,
         latent_rnn_dropout_prob,
         batch_size,
         num_epochs,
         train,
         lr,
         plot,
         log,
         auto_reg,
         teacher_forcing,
         early_stop
         ):

    # init dataset
    dataset_manager = DatasetManager()
    metadatas = [
        BeatMarkerMetadata(subdivision=6),
        TickMetadata(subdivision=6)
    ]
    mvae_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_dataset_vae: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train',
        **mvae_train_kwargs
    )
    # init vae model
    vae_model = MeasureVAE(
        dataset=folk_dataset_vae,
        note_embedding_dim=note_embedding_dim,
        metadata_embedding_dim=metadata_embedding_dim,
        num_encoder_layers=num_encoder_layers,
        encoder_hidden_size=encoder_hidden_size,
        encoder_dropout_prob=encoder_dropout_prob,
        latent_space_dim=latent_space_dim,
        num_decoder_layers=num_decoder_layers,
        decoder_hidden_size=decoder_hidden_size,
        decoder_dropout_prob=decoder_dropout_prob,
        has_metadata=has_metadata
    )
    vae_model.load()  # VAE model must be pre-trained

    folk_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_test_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': False
    }
    folk_dataset_train: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train',
        **folk_train_kwargs
    )
    folk_dataset_test: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train',
        **folk_test_kwargs
    )

    # init latent_rnn model
    model = LatentRNN(
        dataset=folk_dataset_train,
        vae_model=vae_model,
        num_rnn_layers=num_latent_rnn_layers,
        rnn_hidden_size=latent_rnn_hidden_size,
        dropout=latent_rnn_dropout_prob,
        rnn_class=torch.nn.GRU,
        auto_reg=auto_reg,
        teacher_forcing=teacher_forcing
    )

    if train:
        if torch.cuda.is_available():
            model.cuda()
        trainer = LatentRNNTrainer(
            dataset=folk_dataset_train,
            model=model,
            lr=lr,
            early_stopping=early_stop
        )
        trainer.train_model(
            batch_size=batch_size,
            num_epochs=num_epochs,
            plot=plot,
            log=log
        )
    else:
        model.load()
        model.cuda()
        model.eval()
    tester = LatentRNNTester(
        dataset=folk_dataset_test,
        model=model
    )
    tester.test_model(
        batch_size=batch_size
    )
    gen_score, score, original_score = tester.generation_random(
        tensor_score=None,
        start_measure=8,
        num_measures_gen=2
    )
    print( " --- score --- " )
    print(  score  )

    gen_score.show()
    original_score.show()
    gen_score2, score, original_score2 = tester.generation_test()
    gen_score2.show()
    original_score2.show()

    print( " --- score --- " )
    print(  score  )
Ejemplo n.º 19
0
    def __init__(self,
                 writing_dir,
                 corpus_it_gen,
                 subdivision_model=2,
                 subdivision_read=4,
                 sequence_size=3,
                 velocity_quantization=2,
                 temperature=1.2):
        """
        :param subdivision: number of sixteenth notes per beat
        """
        self.subdivision_read = subdivision_read
        self.sequence_size = sequence_size
        self.velocity_quantization = velocity_quantization
        self.writing_dir = writing_dir

        #################################################################
        #  Need the old db used to train the model (yes it sucks...)
        dataset_manager = DatasetManager()
        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision_model,
            'sequence_size': sequence_size,
            'velocity_quantization': velocity_quantization,
            'max_transposition': 12,
            'compute_statistics_flag': False
        }
        dataset = dataset_manager.get_dataset(name='arrangement_large',
                                              **arrangement_dataset_kwargs)

        #  Model params (need to know them :))
        num_heads = 8
        per_head_dim = 64
        local_position_embedding_dim = 8
        position_ff_dim = 1024
        hierarchical = False
        block_attention = False
        nade = False
        conditioning = True
        double_conditioning = False
        num_layers = 2
        suffix = 'TEST'

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ReductionDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=False)

        processor_decoder = ReductionDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention)

        processor_encodencoder = None
        #################################################################

        #################################################################
        # Init model
        # Use all gpus available
        gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())]
        print(gpu_ids)

        self.model = Transformer(
            dataset=dataset,
            data_processor_encodencoder=processor_encodencoder,
            data_processor_encoder=processor_encoder,
            data_processor_decoder=processor_decoder,
            num_heads=num_heads,
            per_head_dim=per_head_dim,
            position_ff_dim=position_ff_dim,
            hierarchical_encoding=hierarchical,
            block_attention=block_attention,
            nade=nade,
            conditioning=conditioning,
            double_conditioning=double_conditioning,
            num_layers=num_layers,
            dropout=0.1,
            input_dropout=0.2,
            reduction_flag=True,
            lr=1e-4,
            gpu_ids=gpu_ids,
            suffix=suffix)
        #################################################################

        self.corpus_it_gen = corpus_it_gen

        self.temperature = temperature

        return
Ejemplo n.º 20
0
def main(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size,
         dropout_lstm, linear_hidden_size, batch_size, num_epochs, train,
         update, num_iterations, sequence_length_ticks, model_id,
         include_transpositions, update_iterations, generations_per_iteration,
         num_generations, score_chorales, write_scores):

    print('step 1/3: prepare dataset')
    dataset_manager = DatasetManager()
    metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()]
    chorale_dataset_kwargs = {
        'voice_ids': [0, 1, 2, 3],
        'metadatas': metadatas,
        'sequences_size': 8,
        'subdivision': 4,
        'include_transpositions': include_transpositions,
    }

    bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
        name='bach_chorales', **chorale_dataset_kwargs)
    dataset = bach_chorales_dataset
    load_or_pickle_distributions(dataset)

    print('step 2/3: prepare model')
    print(f'Model ID: {model_id}')
    deepbach = DeepBach(
        dataset=dataset,
        note_embedding_dim=note_embedding_dim,
        meta_embedding_dim=meta_embedding_dim,
        num_layers=num_layers,
        lstm_hidden_size=lstm_hidden_size,
        dropout_lstm=dropout_lstm,
        linear_hidden_size=linear_hidden_size,
        model_id=model_id,
    )

    if train:
        print('step 2a/3: train base model')
        deepbach.train(batch_size=batch_size,
                       num_epochs=num_epochs,
                       split=[0.85, 0.15])
    else:
        print('step 2a/3: load model')
        deepbach.load()
        deepbach.cuda()

    if update:
        print(
            f'step 2b/3: update base model over {update_iterations} iterations'
        )
        thres = get_threshold('data/chorale_scores.csv', col=-1)
        print(f'Threshold for selection: {thres}')
        update_file = open('data/update_scores.csv', 'w')
        reader = csv.writer(update_file)
        reader.writerow(['iteration', 'chorale ID', 'score'])
        for i in range(update_iterations):
            print(f'----------- Iteration {i} -----------')
            picked_chorales = []
            num_picked_chorales = 0
            ensure_dir(f'generations/{model_id}/{i}')
            for j in tqdm(range(generations_per_iteration)):
                chorale, tensor_chorale, tensor_metadata = deepbach.generation(
                    num_iterations=num_iterations,
                    sequence_length_ticks=sequence_length_ticks,
                )

                score = score_chorale(chorale, dataset)

                # write data to csv file
                reader.writerow([i, j,
                                 score])  # iteration, generation #, score

                # worst Bach chorale score rounded up to nearest .01
                if score > thres:
                    print(f'Picked chorale {j} with score {score}')
                    picked_chorales.append(chorale)
                    num_picked_chorales += 1

                chorale.write('midi', f'generations/{model_id}/{i}/c{j}.mid')

            print(
                f'Number of picked chorales for iteration {i}: {num_picked_chorales}'
            )

            if num_picked_chorales == 0:
                continue

            all_datasets.update({
                f'generated_chorales_{i}': {
                    'dataset_class_name': ChoraleDataset,
                    'corpus_it_gen':
                    GeneratedChoraleIteratorGen(picked_chorales)
                }
            })
            generated_dataset: ChoraleDataset = dataset_manager.get_dataset(
                name=f'generated_chorales_{i}',
                index2note_dicts=dataset.index2note_dicts,
                note2index_dicts=dataset.note2index_dicts,
                voice_ranges=dataset.voice_ranges,
                **chorale_dataset_kwargs)

            deepbach.dataset = generated_dataset
            deepbach.train(
                batch_size=batch_size,
                num_epochs=2,
                split=[1, 0],  # use all selected chorales for training
                early_stopping=False)

    # generate chorales
    if score_chorales:
        chorale_scores = {}
        print('Scoring real chorales')
        for chorale_id, chorale in tqdm(enumerate(dataset.iterator_gen()),
                                        total=num_generations):
            score = score_chorale(chorale, dataset)
            chorale_scores[chorale_id] = score
            if chorale_id == num_generations:
                break

        # write scores to file
        if write_scores:
            with open('data/chorale_scores.csv', 'w') as chorale_file:
                reader = csv.writer(chorale_file)
                reader.writerow(['', 'score'] + list(weights.keys()))
                for id, value in chorale_scores.items():
                    reader.writerow([id, value])

    if num_generations != 0:
        generation_scores = {}
        print('Generating and scoring generated chorales')
        ensure_dir(f'generations/{model_id}')
        for i in range(num_generations):
            chorale, tensor_chorale, tensor_metadata = deepbach.generation(
                num_iterations=num_iterations,
                sequence_length_ticks=sequence_length_ticks,
            )
            chorale.write('midi', f'generations/{model_id}/c{i}.mid')
            score = score_chorale(chorale, dataset)
            generation_scores[i] = score

        # write scores to file
        if write_scores:
            with open(f'data/model{model_id}_scores.csv',
                      'w') as generation_file:
                reader = csv.writer(generation_file)
                reader.writerow(['', 'score'] + list(weights.keys()))
                for id, value in generation_scores.items():
                    reader.writerow([id, value])
Ejemplo n.º 21
0
def main(
    midi_input,
    temperature,
    num_examples_sampled,
    suffix,
):
    # Use all gpus available
    gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())]
    print(gpu_ids)

    hierarchical = False
    nade = False
    num_layers = 6
    dropout = 0.
    input_dropout = 0.
    input_dropout_token = 0.
    per_head_dim = 64
    num_heads = 8
    local_position_embedding_dim = 8
    position_ff_dim = 2048
    enc_dec_conditioning = 'split'
    lr = 1
    mixup = None
    scheduled_training = 0
    dataset_type = 'arrangement_voice'
    conditioning = True
    double_conditioning = None
    subdivision = 16
    sequence_size = 7
    velocity_quantization = 2
    max_transposition = 12
    group_instrument_per_section = False
    reduction_flag = False
    instrument_presence_in_encoder = False
    cpc_config_name = None
    block_attention = False

    # Get dataset
    dataset_manager = DatasetManager()
    dataset, processor_decoder, processor_encoder, processor_encodencoder = \
        dataset_import.get_dataset(dataset_manager, dataset_type, subdivision, sequence_size, velocity_quantization,
                                   max_transposition,
                                   num_heads, per_head_dim, local_position_embedding_dim, block_attention,
                                   group_instrument_per_section, nade, cpc_config_name, double_conditioning,
                                   instrument_presence_in_encoder)

    model = Transformer(dataset=dataset,
                        data_processor_encodencoder=processor_encodencoder,
                        data_processor_encoder=processor_encoder,
                        data_processor_decoder=processor_decoder,
                        num_heads=num_heads,
                        per_head_dim=per_head_dim,
                        position_ff_dim=position_ff_dim,
                        enc_dec_conditioning=enc_dec_conditioning,
                        hierarchical_encoding=hierarchical,
                        block_attention=block_attention,
                        nade=nade,
                        conditioning=conditioning,
                        double_conditioning=double_conditioning,
                        num_layers=num_layers,
                        dropout=dropout,
                        input_dropout=input_dropout,
                        input_dropout_token=input_dropout_token,
                        lr=lr,
                        reduction_flag=reduction_flag,
                        gpu_ids=gpu_ids,
                        suffix=suffix,
                        mixup=mixup,
                        scheduled_training=scheduled_training)

    model.load_overfit()
    model.cuda()

    print('Generation')
    #  Allows to override dataset quantization for generation
    subdivision_generation = subdivision

    source = {
        'source_path': f'midi_inputs/{midi_input}',
        'writing_name': f'{midi_input}',
        'writing_tempo': 'adagio'
    }

    write_dir = 'midi_inputs'

    generation_from_file(model=model,
                         temperature=temperature,
                         batch_size=num_examples_sampled,
                         filepath=source["source_path"],
                         write_dir=write_dir,
                         write_name=source["writing_name"],
                         banned_instruments=[],
                         unknown_instruments=[],
                         writing_tempo=source["writing_tempo"],
                         subdivision=subdivision_generation,
                         number_sampling_steps=1)
    return
Ejemplo n.º 22
0
group_instrument_per_section = False
reduction_flag = False
lr = 1.
cpc_config_name = None
subdivision = args.subdivision

# Use all gpus available
gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())]
print(f'Using GPUs {gpu_ids}')
if len(gpu_ids) == 0:
    device = 'cpu'
else:
    device = 'cuda'

# Get dataset
dataset_manager = DatasetManager()
dataset, processor_decoder, processor_encoder, processor_encodencoder = \
    dataset_import.get_dataset(dataset_manager, args.dataset_type, args.subdivision, args.sequence_size,
                               args.velocity_quantization, args.max_transposition,
                               args.num_heads, args.per_head_dim, args.local_position_embedding_dim,
                               args.block_attention,
                               group_instrument_per_section, args.nade, cpc_config_name, args.double_conditioning,
                               args.instrument_presence_in_encoder)

# Load model
model = Transformer(dataset=dataset,
                    data_processor_encodencoder=processor_encodencoder,
                    data_processor_encoder=processor_encoder,
                    data_processor_decoder=processor_decoder,
                    num_heads=args.num_heads,
                    per_head_dim=args.per_head_dim,
Ejemplo n.º 23
0
        metadata_tensor_dataset = torch.cat(metadata_tensor_dataset, 0)
        num_datapoints, length, num_metadata = metadata_tensor_dataset.size()
        metadata_tensor_dataset = metadata_tensor_dataset.view(
            num_datapoints, 1, length, num_metadata)
        dataset = TensorDataset(score_tensor_dataset, metadata_tensor_dataset)
        print(f'Sizes: {score_tensor_dataset.size()}')
        print(f'Sizes: {metadata_tensor_dataset.size()}')
        return dataset


if __name__ == '__main__':

    from DatasetManager.dataset_manager import DatasetManager
    from DatasetManager.metadata import BeatMarkerMetadata, TickMetadata

    dataset_manager = DatasetManager()
    metadatas = [
        BeatMarkerMetadata(subdivision=6),
        TickMetadata(subdivision=6)
    ]
    folk_dataset_kwargs = {'metadatas': metadatas, 'sequences_size': 32}
    folk_dataset: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4measures_test', **folk_dataset_kwargs)
    (train_dataloader, val_dataloader,
     test_dataloader) = folk_dataset.data_loaders(batch_size=100,
                                                  split=(0.7, 0.2))
    print('Num Train Batches: ', len(train_dataloader))
    print('Num Valid Batches: ', len(val_dataloader))
    print('Num Test Batches: ', len(test_dataloader))

    for sample_id, (score, _) in tqdm(enumerate(train_dataloader)):
Ejemplo n.º 24
0
def main(args):
    """

    :param args:
    :return:
    """
    dropout = 0.
    input_dropout = 0.
    input_dropout_token = 0.
    mixup = False
    scheduled_training = 0.
    group_instrument_per_section = False
    reduction_flag = False
    lr = 1.
    cpc_config_name = None
    subdivision = args.subdivision

    # Use all gpus available
    gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())]
    print(f'Using GPUs {gpu_ids}')
    if len(gpu_ids) == 0:
        device = 'cpu'
    else:
        device = 'cuda'

    # Get dataset
    dataset_manager = DatasetManager()
    dataset, processor_decoder, processor_encoder, processor_encodencoder = \
        dataset_import.get_dataset(dataset_manager, args.dataset_type, args.subdivision, args.sequence_size,
                                   args.velocity_quantization, args.max_transposition,
                                   args.num_heads, args.per_head_dim, args.local_position_embedding_dim,
                                   args.block_attention,
                                   group_instrument_per_section, args.nade, cpc_config_name, args.double_conditioning,
                                   args.instrument_presence_in_encoder)

    # Load model
    model = Transformer(dataset=dataset,
                        data_processor_encodencoder=processor_encodencoder,
                        data_processor_encoder=processor_encoder,
                        data_processor_decoder=processor_decoder,
                        num_heads=args.num_heads,
                        per_head_dim=args.per_head_dim,
                        position_ff_dim=args.position_ff_dim,
                        enc_dec_conditioning=args.enc_dec_conditioning,
                        hierarchical_encoding=args.hierarchical,
                        block_attention=args.block_attention,
                        nade=args.nade,
                        conditioning=args.conditioning,
                        double_conditioning=args.double_conditioning,
                        num_layers=args.num_layers,
                        dropout=dropout,
                        input_dropout=input_dropout,
                        input_dropout_token=input_dropout_token,
                        lr=lr,
                        reduction_flag=reduction_flag,
                        gpu_ids=gpu_ids,
                        suffix=args.suffix,
                        mixup=mixup,
                        scheduled_training=scheduled_training)

    model.load_overfit(device=device)
    model.to(device)
    model = model.eval()

    # Dir for writing generated files
    writing_dir = f'{os.getcwd()}/generation'
    if not os.path.isdir(writing_dir):
        os.makedirs(writing_dir)

    # Create server
    server_address = (args.ip, args.port)
    server = OrchestraServer(server_address, model, subdivision, writing_dir)
    print(f'[Server listening to {args.ip} on port {args.port}]')
    server.serve_forever()
Ejemplo n.º 25
0
def init_app(block_attention, hierarchical, nade, num_layers, dropout,
             input_dropout, per_head_dim, num_heads,
             local_position_embedding_dim, position_ff_dim, suffix,
             subdivision, sequence_size, velocity_quantization,
             max_transposition, port):
    global metadatas
    global _subdivision
    global _batch_size
    global _banned_instruments
    global _temperature
    global _lowest_entropy_first
    global _context_size

    _subdivision = subdivision
    _batch_size = 1
    _banned_instruments = []
    _temperature = 1.2
    _lowest_entropy_first = True

    gpu_ids = [int(gpu) for gpu in range(torch.cuda.device_count())]
    print(gpu_ids)

    dataset_manager = DatasetManager()
    arrangement_dataset_kwargs = {
        'transpose_to_sounding_pitch': True,
        'subdivision': subdivision,
        'sequence_size': sequence_size,
        'velocity_quantization': velocity_quantization,
        'max_transposition': max_transposition,
        'compute_statistics_flag': False
    }
    dataset: ArrangementDataset = dataset_manager.get_dataset(
        name='arrangement', **arrangement_dataset_kwargs)

    reducer_input_dim = num_heads * per_head_dim

    processor_encoder = ArrangementDataProcessor(
        dataset=dataset,
        embedding_dim=reducer_input_dim - local_position_embedding_dim,
        reducer_input_dim=reducer_input_dim,
        local_position_embedding_dim=local_position_embedding_dim,
        flag_orchestra=False,
        block_attention=False)

    processor_decoder = ArrangementDataProcessor(
        dataset=dataset,
        embedding_dim=reducer_input_dim - local_position_embedding_dim,
        reducer_input_dim=reducer_input_dim,
        local_position_embedding_dim=local_position_embedding_dim,
        flag_orchestra=True,
        block_attention=block_attention)

    _context_size = processor_decoder.num_frames_orchestra - 1

    global model
    model = Transformer(
        dataset=dataset,
        data_processor_encoder=processor_encoder,
        data_processor_decoder=processor_decoder,
        num_heads=num_heads,
        per_head_dim=per_head_dim,
        position_ff_dim=position_ff_dim,
        hierarchical_encoding=hierarchical,
        block_attention=block_attention,
        nade=nade,
        num_layers=num_layers,
        dropout=dropout,
        input_dropout=input_dropout,
        conditioning=True,
        lr=0,
        gpu_ids=gpu_ids,
        suffix=suffix,
    )

    model.load_overfit()
    model.cuda()

    # TODO : piano should be modifiable (by dropping mxml file ?)
    filepath = "/home/leo/Recherche/Databases/Orchestration/arrangement_mxml/source_for_generation/chopin_Prel_Op28_20_xs.xml"
    global _piano
    global _rhythm_piano
    global _orchestra
    global _orchestra_silenced_instruments
    _piano, _rhythm_piano, _orchestra, _orchestra_silenced_instruments = \
        model.data_processor_encoder.init_generation_filepath(_batch_size, filepath,
                                                              banned_instruments=_banned_instruments,
                                                              subdivision=_subdivision)

    # launch the script
    # use threaded=True to fix Chrome/Chromium engine hanging on requests
    # [https://stackoverflow.com/a/30670626]
    local_only = False
    if local_only:
        # accessible only locally:
        app.run(threaded=True)
    else:
        # accessible from outside:
        app.run(host='0.0.0.0', port=port, threaded=True)
Ejemplo n.º 26
0
def main(note_embedding_dim, metadata_embedding_dim, num_encoder_layers,
         encoder_hidden_size, encoder_dropout_prob, latent_space_dim,
         num_decoder_layers, decoder_hidden_size, decoder_dropout_prob,
         has_metadata, num_latent_rnn_layers, latent_rnn_hidden_size,
         latent_rnn_dropout_prob, num_layers, lstm_hidden_size, dropout_lstm,
         input_dropout, linear_hidden_size, batch_size, num_target,
         num_models):

    random.seed(0)

    # init dataset
    dataset_manager = DatasetManager()
    metadatas = [
        BeatMarkerMetadata(subdivision=6),
        TickMetadata(subdivision=6)
    ]
    mvae_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_dataset_vae: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **mvae_train_kwargs)
    # init vae model
    vae_model = MeasureVAE(dataset=folk_dataset_vae,
                           note_embedding_dim=note_embedding_dim,
                           metadata_embedding_dim=metadata_embedding_dim,
                           num_encoder_layers=num_encoder_layers,
                           encoder_hidden_size=encoder_hidden_size,
                           encoder_dropout_prob=encoder_dropout_prob,
                           latent_space_dim=latent_space_dim,
                           num_decoder_layers=num_decoder_layers,
                           decoder_hidden_size=decoder_hidden_size,
                           decoder_dropout_prob=decoder_dropout_prob,
                           has_metadata=has_metadata)
    vae_model.load()  # VAE model must be pre-trained
    if torch.cuda.is_available():
        vae_model.cuda()
    folk_train_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': True
    }
    folk_test_kwargs = {
        'metadatas': metadatas,
        'sequences_size': 32,
        'num_bars': 16,
        'train': False
    }
    folk_dataset_train: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **folk_train_kwargs)
    folk_dataset_test: FolkDataset = dataset_manager.get_dataset(
        name='folk_4by4nbars_train', **folk_test_kwargs)

    # Initialize stuff
    test_filenames = folk_dataset_test.dataset_filenames
    num_melodies = 32
    num_measures = 16
    req_length = num_measures * 4 * 6
    num_past = 6
    num_future = 6
    num_target = 4
    cur_dir = os.path.dirname(os.path.realpath(__file__))
    save_folder = 'saved_midi/'

    # Initialize models and testers
    latent_rnn_model = LatentRNN(dataset=folk_dataset_train,
                                 vae_model=vae_model,
                                 num_rnn_layers=num_latent_rnn_layers,
                                 rnn_hidden_size=latent_rnn_hidden_size,
                                 dropout=latent_rnn_dropout_prob,
                                 rnn_class=torch.nn.GRU,
                                 auto_reg=False,
                                 teacher_forcing=True)
    latent_rnn_model.load()  # latent_rnn model must be pre-trained
    if torch.cuda.is_available():
        latent_rnn_model.cuda()
    latent_rnn_tester = LatentRNNTester(dataset=folk_dataset_test,
                                        model=latent_rnn_model)

    def process_latent_rnn_batch(score_tensor,
                                 num_past=6,
                                 num_future=6,
                                 num_target=4):
        assert (num_past + num_future + num_target == 16)
        score_tensor = score_tensor.unsqueeze(0)
        score_tensor = LatentRNNTrainer.split_to_measures(score_tensor, 24)
        tensor_past, tensor_future, tensor_target = LatentRNNTrainer.split_score(
            score_tensor=score_tensor,
            num_past=num_past,
            num_future=num_future,
            num_target=num_target,
            measure_seq_len=24)
        return tensor_past, tensor_future, tensor_target

    # Second save latent_rnn generations
    for i in tqdm(range(num_melodies)):
        f = test_filenames[i]
        f_id = f[:-4]
        if f_id == 'tune_16154':
            for j in range(15):
                save_filename = os.path.join(
                    cur_dir,
                    save_folder + f_id + '_' + str(j) + '_latent_rnn.mid')
                f = os.path.join(
                    folk_dataset_test.corpus_it_gen.raw_dataset_dir, f)
                score = folk_dataset_test.corpus_it_gen.get_score_from_path(
                    f, fix_and_expand=True)
                score_tensor = folk_dataset_test.get_score_tensor(score)
                # ignore scores with less than 16 measures
                if score_tensor.size(1) < req_length:
                    continue
                score_tensor = score_tensor[:, :req_length]
                # metadata_tensor = metadata_tensor[:, :req_length, :]
                # save regeneration using latent_rnn
                tensor_past, tensor_future, tensor_target = process_latent_rnn_batch(
                    score_tensor, num_past, num_future, num_target)
                # forward pass through latent_rnn
                weights, gen_target, _ = latent_rnn_tester.model(
                    past_context=tensor_past,
                    future_context=tensor_future,
                    target=tensor_target,
                    measures_to_generate=num_target,
                    train=False,
                )
                # convert to score
                batch_size, _, _ = gen_target.size()
                gen_target = gen_target.view(batch_size, num_target, 24)
                gen_score_tensor = torch.cat(
                    (tensor_past, gen_target, tensor_future), 1)
                latent_rnn_score = folk_dataset_test.tensor_to_score(
                    gen_score_tensor.cpu())
                latent_rnn_score.write('midi', fp=save_filename)