Esempio n. 1
0
def process_directory(midi_directory, note_mapping_config_path,
                      output_file_name):
    """
    Processes all MIDI files found in a given directory (recursively) and converts them into text representations for
    training semantic models.
    :param midi_directory: the directory to process.
    :param note_mapping_config_path: path to MIDI to text configuration settings.
    :param output_file_name: full file path and name to output file.
    :return: none
    """

    note_mapper = NoteMapper(note_mapping_config_path)
    output_file = open(output_file_name, "w")

    for subdir, dirs, files in os.walk(midi_directory):

        for f in files:
            if f.lower().endswith(".mid"):

                path = os.path.join(subdir, f)
                print(f)

                try:
                    midi_reader = MidiReader(note_mapper)
                    df = midi_reader.convert_to_dataframe(path)

                    grouped = df.groupby('measure')['notes'].apply(','.join)
                    for _, notes in grouped.items():
                        output_file.write(notes + "\n")

                except Exception:
                    logger.error("Error creating sequence for file: " + str(f),
                                 exc_info=True)

    output_file.close()
Esempio n. 2
0
    def __init__(self, *args, **kwargs):
        super(MidiReaderTests, self).__init__(*args, **kwargs)

        # Prepare tests objects
        note_mapping_config_path = "resources/config/map-to-group.json"
        note_mapper = NoteMapper(note_mapping_config_path)
        self.reader = MidiReader(note_mapper)
def main():
    # Documents used to train semantic encoder model
    # encoder_training_docs = "../../midi-embeddings/data/1_measure_full.txt"
    encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt"

    pipeline_params = {

        # Encoder (doc2vec) settings:
        'doc2vec_docs': encoder_training_docs,
        'doc2vec_dm': 1,
        'doc2vec_dm_mean': 1,
        'doc2vec_epochs': 2,
        'doc2vec_hs': 0,
        'doc2vec_learning_rate_start': 0.025,
        'doc2vec_learning_rate_end': 0.2,
        'doc2vec_min_count': 10,
        'doc2vec_negative': 0,
        'doc2vec_vector_size': 8,  # 24,
        'doc2vec_window': 10,  # 3,

        # Sequence learning (Keras LSTM) settings:
        'nn_features': ['bpm', 'measure', 'beat'],
        'nn_batch_size': 128,
        'nn_dense_activation_function': "linear",
        'nn_dropout': 0,
        'nn_epochs': 1,
        'nn_hidden_neurons': 8,  # 30,
        'nn_layers': 4,  # 15,
        'nn_lstm_activation_function': "selu",
        'nn_lstm_n_prev': 1024,
        'nn_loss': 'mean_absolute_error',
        'nn_optimizer': 'rmsprop'
    }

    # Define note mapper for MIDI file loading
    note_mapping_config_path = "../settings/map-to-group.json"
    note_mapper = NoteMapper(note_mapping_config_path)

    # Data loader used to encode MIDI-format training files
    data_loader = MidiDataLoader(note_mapper, params=pipeline_params)

    # Define training documents for sequence learning
    training_docs = [
        "/Users/taylorpeer/Projects/se-project/Midi2Vec/resources/classification/fold1",
        "/Users/taylorpeer/Projects/se-project/Midi2Vec/resources/classification/fold2",
        "/Users/taylorpeer/Projects/se-project/Midi2Vec/resources/classification/fold3"
    ]

    pipeline = ClassificationPipeline(params=pipeline_params)
    pipeline.set_data_loader(data_loader)
    pipeline.set_training_docs(training_docs)
    pipeline.set_k_fold_cross_eval(k=3)
    pipeline.set_evaluator(LossEvaluator())
    pipeline.run()
Esempio n. 4
0
def process_directory(midi_directory, note_mapping_config_path, output_dir):
    """
    Processes all MIDI files found in a given directory (recursively) and TODO.
    :param midi_directory: the directory to process.
    :param note_mapping_config_path: path to MIDI to text configuration settings.
    :param output_dir: TODO
    :return: none
    """

    note_mapper = NoteMapper(note_mapping_config_path)

    for subdir, dirs, files in os.walk(midi_directory):

        for f in files:
            if f.lower().endswith(".mid"):

                path = os.path.join(subdir, f)
                print(f)

                try:
                    midi_reader = MidiReader(note_mapper)
                    midi_writer = MidiWriter(note_mapper)
                    df = midi_reader.convert_to_dataframe(path)

                    for index, row in df.iterrows():
                        notes = row['notes'].split(",")
                        fixed_notes = []
                        for note in notes:
                            if "_" in note:
                                fields = note.split("_")
                                instrument = "bass"
                                pitch = fields[1]
                                duration = float(fields[2])
                                fixed_note = instrument + "_" + pitch + "_" + str(
                                    duration)
                                fixed_notes.append(fixed_note)
                        notes = ','.join(fixed_notes)
                        if len(fixed_notes) > 0:
                            df.at[index, 'notes'] = notes

                    midi_writer.convert_to_midi(df, output_dir + "/" + f)

                except Exception:
                    logger.error("Error creating sequence for file: " + str(f),
                                 exc_info=True)
Esempio n. 5
0
    def __init__(self, *args, **kwargs):
        super(MidiDataLoadingTests, self).__init__(*args, **kwargs)
        self.test_params = {"doc2vec_dm": 1,
                            "doc2vec_dm_mean": 1,
                            "doc2vec_epochs": 1,
                            "doc2vec_hs": 0,
                            "doc2vec_learning_rate_start": 0.025,
                            "doc2vec_learning_rate_end": 0.01,
                            "doc2vec_min_count": 2,
                            "doc2vec_negative": 0,
                            "doc2vec_vector_size": 4,
                            "doc2vec_window": 1,
                            "nn_features": ['bpm', 'measure', 'beat'],
                            "nn_lstm_n_prev": 16}

        note_mapping_config_path = "resources/data_loading/map-to-group.json"
        note_mapper = NoteMapper(note_mapping_config_path)

        encoder = self._get_test_encoder()
        self.test_instance = MidiDataLoader(note_mapper, params=self.test_params, encoder=encoder)
Esempio n. 6
0
def main():
    # Documents used to train semantic encoder model
    encoder_training_docs = "../../data/1_measure_full.txt"
    # encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt"

    pipeline_params = {

        # Encoder (doc2vec) settings:
        'doc2vec_docs': encoder_training_docs,
        'doc2vec_dm': 1,
        'doc2vec_dm_mean': 1,
        'doc2vec_epochs': 1,
        'doc2vec_hs': 0,
        'doc2vec_learning_rate_start': 0.025,
        'doc2vec_learning_rate_end': 0.2,
        'doc2vec_min_count': 5,
        'doc2vec_negative': 0,
        'doc2vec_vector_size': 8,
        'doc2vec_window': 5,

        # Sequence learning (Keras LSTM) settings:
        'nn_features': [],  # ['bpm', 'measure', 'beat'],
        'nn_batch_size': 16,
        'nn_dense_activation_function': "linear",
        'nn_dropout': 0,
        'nn_epochs': 1,
        'nn_hidden_neurons': 8,
        'nn_layers': 4,
        'nn_lstm_activation_function': "selu",
        'nn_lstm_n_prev': 16,
        'nn_loss': 'mean_absolute_error',
        'nn_optimizer': 'rmsprop'
    }

    # Define note mapper for MIDI file loading
    note_mapping_config_path = "../settings/map-to-group.json"
    note_mapper = NoteMapper(note_mapping_config_path)

    # Data loader used to encode MIDI-format training files
    data_loader = MidiDataLoader(note_mapper, params=pipeline_params)

    # Define training documents for sequence learning
    training_docs = [
        "../resources/midi/bach_chorales/01-AchGottundHerr.mid",
        "../resources/midi/bach_chorales/02-AchLiebenChristen.mid",
        "../resources/midi/bach_chorales/03-ChristederdubistTagundLicht.mid",
        "../resources/midi/bach_chorales/04-ChristeDuBeistand.mid",
        "../resources/midi/bach_chorales/05-DieNacht.mid",
        "../resources/midi/bach_chorales/06-DieSonne.mid",
        "../resources/midi/bach_chorales/07-HerrGott.mid",
        "../resources/midi/bach_chorales/08-FuerDeinenThron.mid",
        "../resources/midi/bach_chorales/09-Jesus.mid",
        "../resources/midi/bach_chorales/10-NunBitten.mid"
    ]

    pipeline = GenerativePipeline(params=pipeline_params)
    pipeline.set_data_loader(data_loader)
    pipeline.set_encoder_cache_dir("../notebooks/encoders")
    pipeline.set_training_docs(training_docs)
    pipeline.set_k_fold_cross_eval(k=3)
    pipeline.set_evaluator(F1Evaluator())
    # pipeline.save_best_model("models", "bach_chorales")

    result_df = pipeline.run()
    print(result_df.to_string())
Esempio n. 7
0
def main():
    # Documents used to train semantic encoder model
    encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt"

    param_sweep_values = {

        # Encoder (doc2vec) settings:
        'doc2vec_docs': [encoder_training_docs],
        'doc2vec_dm': [1],
        'doc2vec_dm_mean': [1],
        'doc2vec_epochs': [1],
        'doc2vec_hs': [0],
        'doc2vec_learning_rate_start': [0.025],
        'doc2vec_learning_rate_end': [0.2],
        'doc2vec_min_count': [5],
        'doc2vec_negative': [0],
        'doc2vec_vector_size': [20],
        'doc2vec_window': [1],

        # Sequence learning (Keras LSTM) settings:
        'nn_features': [['bpm', 'measure', 'beat']],
        'nn_batch_size': [100],
        'nn_dense_activation_function': ["linear"],
        'nn_dropout': [0],
        'nn_epochs': [10],
        'nn_hidden_neurons': [10],
        'nn_layers': [10],
        'nn_lstm_activation_function': ["selu"],
        'nn_lstm_n_prev': [4]
    }

    # Define note mapper for MIDI file loading
    note_mapping_config_path = "../settings/map-to-group.json"
    note_mapper = NoteMapper(note_mapping_config_path)

    # Data loader used to encode MIDI-format training files
    data_loader = MidiDataLoader(note_mapper)

    # Define training documents for sequence learning
    training_docs = ["../resources/midi/breakbeats/084 Breakthru.mid",
                     "../resources/midi/breakbeats/086 Clouds.mid",
                     "../resources/midi/breakbeats/089 Get Out.mid",
                     "../resources/midi/breakbeats/089 Wrong.mid",
                     "../resources/midi/breakbeats/090 Deceive.mid",
                     "../resources/midi/breakbeats/090 New York.mid",
                     "../resources/midi/breakbeats/090 Radio.mid",
                     "../resources/midi/breakbeats/093 Pretender.mid",
                     "../resources/midi/breakbeats/093 Right Won.mid",
                     "../resources/midi/breakbeats/094 Run.mid"]

    pipeline = GenerativePipeline()
    pipeline.set_data_loader(data_loader)
    pipeline.set_training_docs(training_docs)
    pipeline.set_k_fold_cross_eval(k=5)
    pipeline.save_best_model("../notebooks/models", "test")

    brute_force_param_sweep = BruteForce(params=param_sweep_values)
    pipeline.set_optimizer(brute_force_param_sweep)

    results_df = pipeline.run()
    print(results_df.to_string())
def main():
    # Documents used to train semantic encoder model
    encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt"

    model_params = {

        # Encoder (doc2vec) settings:
        'encoder_training_docs': encoder_training_docs,
        'doc2vec_dm': 1,
        'doc2vec_dm_mean': 1,
        'doc2vec_epochs': 1,
        'doc2vec_hs': 0,
        'doc2vec_learning_rate_start': 0.025,
        'doc2vec_learning_rate_end': 0.2,
        'doc2vec_min_count': 5,
        'doc2vec_negative': 0,
        'doc2vec_vector_size': 5,
        'doc2vec_window': 1,

        # Sequence learning (Keras LSTM) settings:
        'nn_features': ['bpm', 'measure', 'beat'],
        'nn_batch_size': 100,
        'nn_dense_activation_function': "linear",
        'nn_dropout': 0.05,
        'nn_epochs': 5,
        'nn_hidden_neurons': 10,
        'nn_layers': 10,
        'nn_lstm_activation_function': "selu",
        'nn_lstm_n_prev': 4
    }

    # Train encoder
    encoder = Encoder(model_params)
    docs = encoder.load_documents(model_params['encoder_training_docs'])
    encoder.set_documents(docs)
    encoder.train()

    # Define note mapper for MIDI file loading
    note_mapping_config_path = "../settings/map-to-group.json"
    note_mapper = NoteMapper(note_mapping_config_path)

    # Define training documents for sequence learning
    training_docs = [
        "/Users/taylorpeer/Projects/se-project/midi-embeddings/data/corpora/test/training"
    ]  # TODO paths...

    # Define evaluation documents for sequence learning
    evaluation_docs = []
    evaluation_docs.append(
        "/Users/taylorpeer/Projects/se-project/midi-embeddings/data/corpora/test/test"
    )  # TODO paths...

    # Load training MIDI files using MidiDataLoader
    data_loader = MidiDataLoader(note_mapper,
                                 params=model_params,
                                 encoder=encoder)
    training_data = data_loader.load_data_as_array(training_docs)

    # Set fit_scaler=False to re-use scaler from training set
    test_data = data_loader.load_data_as_array(evaluation_docs,
                                               fit_scaler=False)
    (x_test, y_test) = test_data

    # Train sequence learning model
    sequence_model = GenerativeSequenceLearner(model_params)
    sequence_model.train(training_data)

    # Apply trained model to test set
    predicted = sequence_model.predict(x_test)

    # Evaluate accuracy of model on test set
    evaluator = Evaluator()
    average_error = evaluator.compute_average_error(predicted, y_test)

    # Un-scale predicted and actual values
    scaler = data_loader.get_scaler()
    predicted = scaler.inverse_transform(predicted)
    y_test = scaler.inverse_transform(y_test)

    # Convert predicted vectors to note sequence
    predicted_notes = encoder.convert_feature_vectors_to_text(predicted)

    # Convert actual vectors to note sequence
    actual_notes = encoder.convert_feature_vectors_to_text(y_test)

    # Compute accuracy by measuring precision/recall of predicted vs. actual notes at every timestamp of evaluation
    (precision, recall,
     f1) = evaluator.compute_seq_accuracy(predicted_notes, actual_notes)

    # Remove doc2vec_docs params setting, since otherwise params can't be printed
    model_params = dict((key, value) for key, value in model_params.items()
                        if key != 'doc2vec_docs')

    print(str(model_params))
    print("- precision: " + str(precision))
    print("- recall: " + str(recall))
    print("- f1: " + str(f1))
    print("- average error: " + str(average_error))
    print("---")
Esempio n. 9
0
def main():
    # Documents used to train semantic encoder model
    #encoder_training_docs = "../../midi-embeddings/data/full_1_measure.txt"
    encoder_training_docs = "../resources/encoder_training_docs/full_1_measure_20k.txt"

    model_params = {

        # Encoder (doc2vec) settings:
        'doc2vec_docs': encoder_training_docs,
        'doc2vec_dm': 1,
        'doc2vec_dm_mean': 1,
        'doc2vec_epochs': 2,
        'doc2vec_hs': 0,
        'doc2vec_learning_rate_start': 0.025,
        'doc2vec_learning_rate_end': 0.2,
        'doc2vec_min_count': 10,
        'doc2vec_negative': 0,
        'doc2vec_vector_size': 20,  # 24,
        'doc2vec_window': 10,  # 3,

        # Sequence learning (Keras LSTM) settings:
        'nn_features': ['bpm', 'measure', 'beat'],
        'nn_batch_size': 15,
        'nn_dense_activation_function': "linear",
        'nn_dropout': 0.1,
        'nn_epochs': 75,
        'nn_hidden_neurons': 30,  # 30,
        'nn_layers': 20,  # 15,
        'nn_lstm_activation_function': "selu",
        'nn_lstm_n_prev': 16,
        'nn_loss': 'mean_absolute_error',
        'nn_optimizer': 'rmsprop'
    }

    # Train encoder
    encoder = Encoder(model_params)
    docs = encoder.load_documents(model_params['doc2vec_docs'])
    encoder.set_documents(docs)
    encoder.train()

    # Define note mapper for MIDI file loading
    note_mapping_config_path = "../settings/map-to-group.json"
    note_mapper = NoteMapper(note_mapping_config_path)

    # Define training documents for sequence learning
    training_docs = ["../resources/midi/breakbeats"]

    # Load training MIDI files using MidiDataLoader
    data_loader = MidiDataLoader(note_mapper,
                                 params=model_params,
                                 encoder=encoder)
    training_data = data_loader.load_data_as_array(training_docs)

    # Train sequence learning model
    sequence_model = GenerativeSequenceLearner(model_params)
    sequence_model.train(training_data)

    # TODO select seed sequence for training
    seed_sequences = [
        "../resources/midi/breakbeats/084 Breakthru.mid",
        "../resources/midi/breakbeats/086 Clouds.mid",
        "../resources/midi/breakbeats/089 Get Out.mid",
        "../resources/midi/breakbeats/089 Wrong.mid",
        "../resources/midi/breakbeats/090 Deceive.mid",
        "../resources/midi/breakbeats/090 New York.mid",
        "../resources/midi/breakbeats/090 Radio.mid",
        "../resources/midi/breakbeats/093 Pretender.mid",
        "../resources/midi/breakbeats/093 Right Won.mid",
        "../resources/midi/breakbeats/094 Run.mid"
    ]

    sequence_generator = SequenceGenerator(data_loader, sequence_model)
    length = 64

    for seq_index, seed in enumerate(seed_sequences):
        generated_seq_df = sequence_generator.generate(seed, length)

        writer = MidiWriter(note_mapper)
        save_to_path = "test_seq_" + str(seq_index) + ".mid"
        writer.convert_to_midi(generated_seq_df, save_to_path)
        print("---")

        print(generated_seq_df.to_string())