m6_4_3 = ("MAHL64O3.MID", [(960, -960), (68160, -960)], 9600, None, []) m5_2 = ("Mahler5-2.mid", [(733440, -960), (742080, -960)], None, None, [(5, '*')]) m5_3_1 = ("Mahlsy531.mid", [(480, -480)], None, None, []) m5_3_2 = ("Mahlsy532.mid", [(480, -480)], None, None, []) dp.midi_to_data([m6_1, [m6_4_1, m6_4_2, m6_4_3], [m5_3_1, m5_3_2], "Mahler62.mid", "Mahler63.mid", "BeatIt.mid"], root_dir = params.root_dir, input_dir = params.input_dir, save_dir = params.data_dir, redo_existing = params.redo) else: dp.midi_to_data(root_dir = params.root_dir, input_dir = params.input_dir, save_dir = params.data_dir, redo_existing = params.redo) # Turn the data representation into a binary matrix format with sequences, keeping track of the sizes of sequences if params.seq: dp.data_to_sequences(sequence_base = params.base, ctx_length = params.ctx_len, inp_length = params.inp_len, \ root_dir = params.root_dir, input_dir = params.data_dir, save_dir = params.sequence_dir) # Turn data representation back to MIDI if params.midi: dp.data_to_midi(root_dir = params.root_dir, input_dir = params.data_dir, save_dir = params.midi_dir) # Find stats manually and inject if params.train: dp.setup_training(root_dir = params.root_dir, input_dir = params.sequence_dir, validation_set_ratio = params.validation) print("[PROC_DIR]: Total samples:", dp.total) print("[PROC_DIR]: Max timesteps context length:", dp.max_context_length) print("[PROC_DIR]: Max timesteps input length:", dp.max_input_length) print("[PROC_DIR]: Number of timestep features:", dp.features) print("[PROC_DIR]: Size of training set:", len(dp.training_set)) print("[PROC_DIR]: Size of validation set:", len(dp.validation_set)) # Generate batches instead of saving the entire training data_to_midi gen = dp.sequences_to_training_data_generator(params.batch_sz, validation = False, root_dir = params.root_dir, input_dir = params.sequence_dir) ds = dp.mp.default_common_duration_set if params.base == "bar":