Exemple #1
0
def build_bach_beat(dataset_manager, batch_size, subdivision, sequences_size, test_bool):
    metadatas = [
        TickMetadata(subdivision=subdivision),
        FermataMetadata(),
        KeyMetadata()
    ]
    name = 'bach_chorales'
    if test_bool:
        name += '_test'
    bach_chorales_dataset: ChoraleBeatsDataset = dataset_manager.get_dataset(
        name=name,
        voice_ids=[0, 1, 2, 3],
        metadatas=metadatas,
        sequences_size=sequences_size,
        subdivision=subdivision
    )
    (train_dataloader,
     val_dataloader,
     test_dataloader) = bach_chorales_dataset.data_loaders(
        batch_size=batch_size,
        cache_dir=dataset_manager.cache_dir,
        split=(0.85, 0.10)
    )
    print('Num Train Batches: ', len(train_dataloader))
    print('Num Valid Batches: ', len(val_dataloader))
    print('Num Test Batches: ', len(test_dataloader))
Exemple #2
0
def main(note_embedding_dim,
         meta_embedding_dim,
         num_layers,
         lstm_hidden_size,
         dropout_lstm,
         linear_hidden_size,
         batch_size,
         num_epochs,
         train,
         num_iterations,
         sequence_length_ticks):
    dataset_manager = DatasetManager()

    metadatas = [
       FermataMetadata(),
       TickMetadata(subdivision=4),
       KeyMetadata()
    ]
    chorale_dataset_kwargs = {
        'voice_ids':      [0, 1, 2, 3],
        'metadatas':      metadatas,
        'sequences_size': 8,
        'subdivision':    4
    }
    bach_chorales_dataset = dataset_manager.get_dataset(
        name='bach_chorales',
        **chorale_dataset_kwargs
        )
    dataset = bach_chorales_dataset

    deepbach = DeepBach(
        dataset=dataset,
        note_embedding_dim=note_embedding_dim,
        meta_embedding_dim=meta_embedding_dim,
        num_layers=num_layers,
        lstm_hidden_size=lstm_hidden_size,
        dropout_lstm=dropout_lstm,
        linear_hidden_size=linear_hidden_size
    )

    if train:
        deepbach.train(batch_size=batch_size,
                       num_epochs=num_epochs)
    else:
        deepbach.load()
        deepbach.cuda()

    print('Generation')
    score, tensor_chorale, tensor_metadata = deepbach.generation(
        num_iterations=num_iterations,
        length=sequence_length_ticks,
    )
    score.write('midi', fp = 'test.mid')
Exemple #3
0
def get_fermatas_tensor(metadata_tensor: torch.Tensor) -> torch.Tensor:
    """
    Extract the fermatas tensor from a metadata tensor
    """
    fermatas_index = [m.__class__
                      for m in metadatas].index(FermataMetadata().__class__)
    # fermatas are shared across all voices so we only consider the first voice
    soprano_voice_metadata = metadata_tensor[0]

    # `soprano_voice_metadata` has shape
    # `(sequence_duration, len(metadatas + 1))`  (accouting for the voice
    # index metadata)
    # Extract fermatas for all steps
    return soprano_voice_metadata[:, fermatas_index]
Exemple #4
0
def main(include_transpositions):
    dataset_manager = DatasetManager()

    print('step 1/3: prepare dataset')
    metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()]
    chorale_dataset_kwargs = {
        'voice_ids': [0, 1, 2, 3],
        'metadatas': metadatas,
        'sequences_size': 8,
        'subdivision': 4,
        'include_transpositions': include_transpositions,
    }

    bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
        name='bach_chorales', **chorale_dataset_kwargs)
    dataset = bach_chorales_dataset
    get_pairs(dataset, model_ids=[5, 9])
Exemple #5
0
    def setup(self):
        """Load the model"""

        # music21.environment.set("musicxmlPath", "/bin/true")

        note_embedding_dim = 20
        meta_embedding_dim = 20
        num_layers = 2
        lstm_hidden_size = 256
        dropout_lstm = 0.5
        linear_hidden_size = 256
        batch_size = 256
        num_epochs = 5
        train = False
        num_iterations = 500
        sequence_length_ticks = 64

        dataset_manager = DatasetManager()

        metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()]
        chorale_dataset_kwargs = {
            "voice_ids": [0, 1, 2, 3],
            "metadatas": metadatas,
            "sequences_size": 8,
            "subdivision": 4,
        }
        bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
            name="bach_chorales", **chorale_dataset_kwargs
        )
        dataset = bach_chorales_dataset

        self.deepbach = DeepBach(
            dataset=dataset,
            note_embedding_dim=note_embedding_dim,
            meta_embedding_dim=meta_embedding_dim,
            num_layers=num_layers,
            lstm_hidden_size=lstm_hidden_size,
            dropout_lstm=dropout_lstm,
            linear_hidden_size=linear_hidden_size,
        )

        self.deepbach.load()

        # load fluidsynth fo rmidi 2 audio conversion
        self.fs = FluidSynth()
Exemple #6
0
mp3_response_headers = {"Content-Type": "audio/mpeg3"}

deepbach = None
_num_iterations = None
_sequence_length_ticks = None
_ticks_per_quarter = None

# TODO use this parameter or extract it from the metadata somehow
timesignature = music21.meter.TimeSignature('4/4')

# generation parameters
# todo put in click?
batch_size_per_voice = 8

metadatas = [
    FermataMetadata(),
    TickMetadata(subdivision=_ticks_per_quarter),
    KeyMetadata()
]


def get_fermatas_tensor(metadata_tensor: torch.Tensor) -> torch.Tensor:
    """
    Extract the fermatas tensor from a metadata tensor
    """
    fermatas_index = [m.__class__
                      for m in metadatas].index(FermataMetadata().__class__)
    # fermatas are shared across all voices so we only consider the first voice
    soprano_voice_metadata = metadata_tensor[0]

    # `soprano_voice_metadata` has shape
Exemple #7
0
def get_dataset(dataset_manager, dataset_type, subdivision, sequence_size,
                velocity_quantization, max_transposition, num_heads,
                per_head_dim, local_position_embedding_dim, block_attention,
                group_instrument_per_section, nade, cpc_config_name,
                double_conditioning, instrument_presence_in_encoder):
    if dataset_type == 'bach':
        if nade:
            raise Exception(
                'j ai l impression que nade c est nimps dans le data processor; check before using'
            )
        metadatas = [
            FermataMetadata(),
            TickMetadata(subdivision=subdivision),
            KeyMetadata()
        ]

        voices_ids = [0, 1, 2, 3]

        if cpc_config_name is not None:
            # notes to compute the first cpc code, we need to waste block_size tokens
            cpc_model = init_cpc_model(cpc_config_name)
            block_size = cpc_model.dataloader_generator.num_tokens_per_block // (
                subdivision * len(voices_ids))
            sequence_size += block_size

        chorale_dataset_kwargs = {
            'voice_ids': voices_ids,
            'metadatas': metadatas,
            'sequences_size': sequence_size,
            'subdivision': subdivision,
        }

        dataset: ChoraleBeatsDataset = dataset_manager.get_dataset(
            name='bach_chorales_beats', **chorale_dataset_kwargs)

        if cpc_config_name is None:
            processor_encoder = BachBeatsDataProcessor(
                dataset=dataset,
                embedding_dim=512 - 8,
                reducer_input_dim=512,
                local_position_embedding_dim=8,
                encoder_flag=True,
                monophonic_flag=False,
                nade_flag=nade)

            processor_decoder = BachBeatsDataProcessor(
                dataset=dataset,
                embedding_dim=512 - 8,
                reducer_input_dim=512,
                local_position_embedding_dim=8,
                encoder_flag=False,
                monophonic_flag=False,
                nade_flag=nade)
        else:
            processor_encoder = BachBeatsCPCDataProcessor(
                dataset=dataset,
                embedding_dim=512 - 8,
                reducer_input_dim=512,
                local_position_embedding_dim=8,
                encoder_flag=True,
                monophonic_flag=False,
                nade_flag=nade,
                cpc_model=cpc_model)

            processor_decoder = BachBeatsCPCDataProcessor(
                dataset=dataset,
                embedding_dim=512 - 8,
                reducer_input_dim=512,
                local_position_embedding_dim=8,
                encoder_flag=False,
                monophonic_flag=False,
                nade_flag=nade,
                cpc_model=cpc_model)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'bach_small':
        metadatas = [
            FermataMetadata(),
            TickMetadata(subdivision=subdivision),
            KeyMetadata()
        ]

        voices_ids = [0, 1, 2, 3]

        if cpc_config_name is not None:
            # notes to compute the first cpc code, we need to waste block_size tokens
            cpc_model = init_cpc_model(cpc_config_name)
            num_tokens_per_block = cpc_model.dataloader_generator.num_tokens_per_block // (
                subdivision * len(voices_ids))
            sequence_size += num_tokens_per_block

        chorale_dataset_kwargs = {
            'voice_ids': voices_ids,
            'metadatas': metadatas,
            'sequences_size': sequence_size,
            'subdivision': subdivision,
        }

        dataset: ChoraleBeatsDataset = dataset_manager.get_dataset(
            name='bach_chorales_beats_test', **chorale_dataset_kwargs)

        if cpc_config_name is None:
            processor_encoder = BachBeatsDataProcessor(
                dataset=dataset,
                embedding_dim=512 - 8,
                reducer_input_dim=512,
                local_position_embedding_dim=8,
                encoder_flag=True,
                monophonic_flag=False,
                nade_flag=nade)

            processor_decoder = BachBeatsDataProcessor(
                dataset=dataset,
                embedding_dim=512 - 8,
                reducer_input_dim=512,
                local_position_embedding_dim=8,
                encoder_flag=False,
                monophonic_flag=False,
                nade_flag=nade)
        else:
            processor_encoder = BachBeatsCPCDataProcessor(
                dataset=dataset,
                embedding_dim=512 - 8,
                reducer_input_dim=512,
                local_position_embedding_dim=8,
                encoder_flag=True,
                monophonic_flag=False,
                nade_flag=nade,
                cpc_model=cpc_model)

            processor_decoder = BachBeatsCPCDataProcessor(
                dataset=dataset,
                embedding_dim=512 - 8,
                reducer_input_dim=512,
                local_position_embedding_dim=8,
                encoder_flag=False,
                monophonic_flag=False,
                nade_flag=nade,
                cpc_model=cpc_model)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'lsdb':
        # leadsheet_dataset_kwargs = {
        #     'sequences_size': 24,
        # }
        # leadsheet_dataset_kwargs = {
        #     'sequences_size': 32,
        # }
        leadsheet_dataset_kwargs = {
            'sequences_size': 12,
        }
        dataset: LsdbDataset = dataset_manager.get_dataset(
            name='lsdb', **leadsheet_dataset_kwargs)
        processor_encoder = LsdbDataProcessor(dataset=dataset,
                                              embedding_dim=512 - 8,
                                              reducer_input_dim=512,
                                              local_position_embedding_dim=8)

        processor_decoder = LsdbDataProcessor(dataset=dataset,
                                              embedding_dim=512 - 8,
                                              reducer_input_dim=512,
                                              local_position_embedding_dim=8)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'reduction':
        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'velocity_quantization': velocity_quantization,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False
        }
        dataset: ArrangementDataset = dataset_manager.get_dataset(
            name='arrangement', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ReductionDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention)

        processor_decoder = ReductionDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'reduction_large':
        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'velocity_quantization': velocity_quantization,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False
        }
        dataset: ArrangementDataset = dataset_manager.get_dataset(
            name='arrangement_large', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ReductionDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention)

        processor_decoder = ReductionDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'reduction_small':
        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'velocity_quantization': velocity_quantization,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False
        }
        dataset: ArrangementDataset = dataset_manager.get_dataset(
            name='arrangement_small', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ReductionDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention)

        processor_decoder = ReductionDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'arrangement':
        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'velocity_quantization': velocity_quantization,
            'max_transposition': max_transposition,
            'integrate_discretization': True,
            'alignement_type': 'complete',
            'compute_statistics_flag': False
        }
        dataset: ArrangementDataset = dataset_manager.get_dataset(
            name='arrangement', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ArrangementDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_decoder = ArrangementDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_encodencoder = ArrangementDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='instruments',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'arrangement_small':
        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'velocity_quantization': velocity_quantization,
            'max_transposition': max_transposition,
            'integrate_discretization': True,
            'alignement_type': 'complete',
            'compute_statistics_flag': False
        }
        dataset: ArrangementDataset = dataset_manager.get_dataset(
            name='arrangement_small', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ArrangementDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_decoder = ArrangementDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_encodencoder = ArrangementDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='instruments',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'arrangement_midiPiano':
        # For now just try a small value, anyway exception if too small
        mean_number_messages_per_time_frame = 14

        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False,
            'mean_number_messages_per_time_frame':
            mean_number_messages_per_time_frame,
            'integrate_discretization': True,
            'alignement_type': 'complete',
        }
        dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset(
            name='arrangement_midiPiano', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ArrangementMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_decoder = ArrangementMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_encodencoder = ArrangementMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='instruments',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'arrangement_midiPiano_small':

        mean_number_messages_per_time_frame = 14

        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False,
            'mean_number_messages_per_time_frame':
            mean_number_messages_per_time_frame,
            'integrate_discretization': True,
            'alignement_type': 'complete'
        }
        dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset(
            name='arrangement_midiPiano_small', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ArrangementMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_decoder = ArrangementMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_encodencoder = ArrangementMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='instruments',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'arrangement_voice':
        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'max_transposition': max_transposition,
            'integrate_discretization': True,
            'alignement_type': 'complete',
            'compute_statistics_flag': False,
        }
        dataset: ArrangementVoiceDataset = dataset_manager.get_dataset(
            name='arrangement_voice', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ArrangementVoiceDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_decoder = ArrangementVoiceDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_encodencoder = ArrangementVoiceDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='instruments',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'arrangement_voice_small':

        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'max_transposition': max_transposition,
            'integrate_discretization': True,
            'alignement_type': 'complete',
            'compute_statistics_flag': False,
        }
        dataset: ArrangementVoiceDataset = dataset_manager.get_dataset(
            name='arrangement_voice_small', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ArrangementVoiceDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_decoder = ArrangementVoiceDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        processor_encodencoder = ArrangementVoiceDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='instruments',
            block_attention=block_attention,
            nade=nade,
            double_conditioning=double_conditioning)

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    # elif dataset_type == 'arrangement_minimal':
    #
    #     arrangement_dataset_kwargs = {
    #         'transpose_to_sounding_pitch': True,
    #         'subdivision': subdivision,
    #         'sequence_size': sequence_size,
    #         'velocity_quantization': velocity_quantization,
    #         'max_transposition': max_transposition,
    #         'compute_statistics_flag': False
    #     }
    #     dataset: ArrangementDataset = dataset_manager.get_dataset(
    #         name='arrangement',
    #         **arrangement_dataset_kwargs
    #     )
    #
    #     reducer_input_dim = num_heads * per_head_dim
    #
    #     processor_encoder = ArrangementDataProcessorMinimal(dataset=dataset,
    #                                                         embedding_dim=reducer_input_dim - local_position_embedding_dim,
    #                                                         reducer_input_dim=reducer_input_dim,
    #                                                         local_position_embedding_dim=local_position_embedding_dim,
    #                                                         flag_orchestra=False,
    #                                                         block_attention=block_attention)
    #
    #     processor_decoder = ArrangementDataProcessorMinimal(dataset=dataset,
    #                                                         embedding_dim=reducer_input_dim - local_position_embedding_dim,
    #                                                         reducer_input_dim=reducer_input_dim,
    #                                                         local_position_embedding_dim=local_position_embedding_dim,
    #                                                         flag_orchestra=True,
    #                                                         block_attention=block_attention)
    #
    #     processor_encodencoder = None
    #
    #     return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'ar':
        dataset: ARDataset = ARDataset(phis=[0.9], length=128, c=0)

        # todo create BachTransformer and put BachBeats data processor in it
        processor_encoder = ARDataProcessor(dataset=dataset)

        processor_decoder = ARDataProcessor(dataset=dataset)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'reduction_categorical':
        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False,
            'group_instrument_per_section': group_instrument_per_section
        }
        dataset: ArrangementVoiceDataset = dataset_manager.get_dataset(
            name='arrangement_categorical', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ReductionCategoricalDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention)

        processor_decoder = ReductionCategoricalDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'reduction_categorical_small':

        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False,
            'group_instrument_per_section': group_instrument_per_section
        }
        dataset: ArrangementVoiceDataset = dataset_manager.get_dataset(
            name='arrangement_categorical_small', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ReductionCategoricalDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention)

        processor_decoder = ReductionCategoricalDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'reduction_midiPiano':
        # For now just try a small value, anyway exception if too small
        mean_number_messages_per_time_frame = 14

        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False,
            'mean_number_messages_per_time_frame':
            mean_number_messages_per_time_frame,
            'integrate_discretization': True
        }
        dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset(
            name='arrangement_midiPiano', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ReductionMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention)

        processor_decoder = ReductionMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder

    elif dataset_type == 'reduction_midiPiano_small':

        #  Todo: compuyte value before ?
        # For now just try a small value, anyway exception if too small
        mean_number_messages_per_time_frame = 14

        arrangement_dataset_kwargs = {
            'transpose_to_sounding_pitch': True,
            'subdivision': subdivision,
            'sequence_size': sequence_size,
            'max_transposition': max_transposition,
            'compute_statistics_flag': False,
            'mean_number_messages_per_time_frame':
            mean_number_messages_per_time_frame,
            'integrate_discretization': True
        }
        dataset: ArrangementMidipianoDataset = dataset_manager.get_dataset(
            name='arrangement_midiPiano_small', **arrangement_dataset_kwargs)

        reducer_input_dim = num_heads * per_head_dim

        processor_encoder = ReductionMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='orchestra',
            block_attention=block_attention)

        processor_decoder = ReductionMidiPianoDataProcessor(
            dataset=dataset,
            embedding_dim=reducer_input_dim - local_position_embedding_dim,
            reducer_input_dim=reducer_input_dim,
            local_position_embedding_dim=local_position_embedding_dim,
            flag='piano',
            block_attention=block_attention)

        processor_encodencoder = None

        return dataset, processor_decoder, processor_encoder, processor_encodencoder
    else:
        raise NotImplementedError
def main(note_embedding_dim, meta_embedding_dim, num_layers, lstm_hidden_size,
         dropout_lstm, linear_hidden_size, batch_size, num_epochs, train,
         update, num_iterations, sequence_length_ticks, model_id,
         include_transpositions, update_iterations, generations_per_iteration,
         num_generations, score_chorales, write_scores):

    print('step 1/3: prepare dataset')
    dataset_manager = DatasetManager()
    metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()]
    chorale_dataset_kwargs = {
        'voice_ids': [0, 1, 2, 3],
        'metadatas': metadatas,
        'sequences_size': 8,
        'subdivision': 4,
        'include_transpositions': include_transpositions,
    }

    bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
        name='bach_chorales', **chorale_dataset_kwargs)
    dataset = bach_chorales_dataset
    load_or_pickle_distributions(dataset)

    print('step 2/3: prepare model')
    print(f'Model ID: {model_id}')
    deepbach = DeepBach(
        dataset=dataset,
        note_embedding_dim=note_embedding_dim,
        meta_embedding_dim=meta_embedding_dim,
        num_layers=num_layers,
        lstm_hidden_size=lstm_hidden_size,
        dropout_lstm=dropout_lstm,
        linear_hidden_size=linear_hidden_size,
        model_id=model_id,
    )

    if train:
        print('step 2a/3: train base model')
        deepbach.train(batch_size=batch_size,
                       num_epochs=num_epochs,
                       split=[0.85, 0.15])
    else:
        print('step 2a/3: load model')
        deepbach.load()
        deepbach.cuda()

    if update:
        print(
            f'step 2b/3: update base model over {update_iterations} iterations'
        )
        thres = get_threshold('data/chorale_scores.csv', col=-1)
        print(f'Threshold for selection: {thres}')
        update_file = open('data/update_scores.csv', 'w')
        reader = csv.writer(update_file)
        reader.writerow(['iteration', 'chorale ID', 'score'])
        for i in range(update_iterations):
            print(f'----------- Iteration {i} -----------')
            picked_chorales = []
            num_picked_chorales = 0
            ensure_dir(f'generations/{model_id}/{i}')
            for j in tqdm(range(generations_per_iteration)):
                chorale, tensor_chorale, tensor_metadata = deepbach.generation(
                    num_iterations=num_iterations,
                    sequence_length_ticks=sequence_length_ticks,
                )

                score = score_chorale(chorale, dataset)

                # write data to csv file
                reader.writerow([i, j,
                                 score])  # iteration, generation #, score

                # worst Bach chorale score rounded up to nearest .01
                if score > thres:
                    print(f'Picked chorale {j} with score {score}')
                    picked_chorales.append(chorale)
                    num_picked_chorales += 1

                chorale.write('midi', f'generations/{model_id}/{i}/c{j}.mid')

            print(
                f'Number of picked chorales for iteration {i}: {num_picked_chorales}'
            )

            if num_picked_chorales == 0:
                continue

            all_datasets.update({
                f'generated_chorales_{i}': {
                    'dataset_class_name': ChoraleDataset,
                    'corpus_it_gen':
                    GeneratedChoraleIteratorGen(picked_chorales)
                }
            })
            generated_dataset: ChoraleDataset = dataset_manager.get_dataset(
                name=f'generated_chorales_{i}',
                index2note_dicts=dataset.index2note_dicts,
                note2index_dicts=dataset.note2index_dicts,
                voice_ranges=dataset.voice_ranges,
                **chorale_dataset_kwargs)

            deepbach.dataset = generated_dataset
            deepbach.train(
                batch_size=batch_size,
                num_epochs=2,
                split=[1, 0],  # use all selected chorales for training
                early_stopping=False)

    # generate chorales
    if score_chorales:
        chorale_scores = {}
        print('Scoring real chorales')
        for chorale_id, chorale in tqdm(enumerate(dataset.iterator_gen()),
                                        total=num_generations):
            score = score_chorale(chorale, dataset)
            chorale_scores[chorale_id] = score
            if chorale_id == num_generations:
                break

        # write scores to file
        if write_scores:
            with open('data/chorale_scores.csv', 'w') as chorale_file:
                reader = csv.writer(chorale_file)
                reader.writerow(['', 'score'] + list(weights.keys()))
                for id, value in chorale_scores.items():
                    reader.writerow([id, value])

    if num_generations != 0:
        generation_scores = {}
        print('Generating and scoring generated chorales')
        ensure_dir(f'generations/{model_id}')
        for i in range(num_generations):
            chorale, tensor_chorale, tensor_metadata = deepbach.generation(
                num_iterations=num_iterations,
                sequence_length_ticks=sequence_length_ticks,
            )
            chorale.write('midi', f'generations/{model_id}/c{i}.mid')
            score = score_chorale(chorale, dataset)
            generation_scores[i] = score

        # write scores to file
        if write_scores:
            with open(f'data/model{model_id}_scores.csv',
                      'w') as generation_file:
                reader = csv.writer(generation_file)
                reader.writerow(['', 'score'] + list(weights.keys()))
                for id, value in generation_scores.items():
                    reader.writerow([id, value])
Exemple #9
0
from grader.grader import score_chorale
from DatasetManager.chorale_dataset import ChoraleDataset
from DatasetManager.dataset_manager import DatasetManager, all_datasets
from DatasetManager.metadata import FermataMetadata, TickMetadata, KeyMetadata
from DatasetManager.helpers import GeneratedChoraleIteratorGen

from DeepBach.model_manager import DeepBach
from DeepBach.helpers import *

print('step 1/3: prepare dataset')
dataset_manager = DatasetManager()
metadatas = [FermataMetadata(), TickMetadata(subdivision=4), KeyMetadata()]
chorale_dataset_kwargs = {
    'voice_ids': [1, 1, 2, 3],
    'metadatas': metadatas,
    'sequences_size': 8,
    'subdivision': 4,
    'include_transpositions': False,
}

bach_chorales_dataset: ChoraleDataset = dataset_manager.get_dataset(
    name='bach_chorales', **chorale_dataset_kwargs)
dataset = bach_chorales_dataset
load_or_pickle_distributions(dataset)

print(dataset.gaussian.covariances_)

# chorale = converter.parse('generations/6/c187.mid')
# score = score_chorale(chorale, dataset)
# print(score)