def play_midi(self, filename, track=1, transpose=6): midi = MidiFile(filename) tune = midi.read_track(track) self.play_tune(midi.tempo, tune, transpose=transpose, name=filename)
def test_03_read_track(self): m = MidiFile(self.sample_file) notes = m.read_track(1) for n, expected in zip(notes[:4], (('r', 0.4343891402714933), ('c2', 1.116279069767441), ('r', 48.00000000000171), ('f1', 1.499999999999998))): self.assertEqual(n[0], expected[0]) self.assertTrue(isclose(n[1], n[1]))
def test_02_init(self): m = MidiFile(self.sample_file)
def test_01_init_no_file(self): with self.assertRaises(OSError): m = MidiFile('nonexist')
def preprocess(filename, timidity, latency, truncate, pad=1, get_raw=False): """ Preprocess an audio file ands its MIDI counterpart. Computes transforms and labels. :param filename: audio filename :param timidity: set to True if the files was rendered with timidity :param latency: in seconds :param truncate: in seconds (0 for no truncation) :param pad: in seconds, will be added at the start and end before spectral transforms :param get_raw: set to True to return raw computed spectrograms (e.g. for visualization) :return: """ filename_midi = filename.rsplit('.')[0] + '.mid' dname = filename.replace('/', '_').replace('\\', '_') # Load files ipad = int(pad * 44100) audio_pad = (ipad, ipad ) # add one blank second at the beginning and at the end if truncate > 0: audio = AudioFile(filename, truncate=int(truncate * 44100), pad=audio_pad) else: audio = AudioFile(filename, pad=audio_pad) mid = MidiFile(filename_midi) step = 0.02 # seconds latency = int(round(latency / step, 0)) # Compute spectrograms spectrograms = ComputeSpectrograms(audio, step=step) # Compute filtered spectrograms melgrams = ComputeMelLayers(spectrograms, step, audio.Fs, latency) # Build the input tensor cnn_window = 15 tensor_mel = BuildTensor(melgrams[:, 2], cnn_window) # Compute CQT FreqAxisLog, time, cqgram = ComputeCqt(audio, 200., 4000., step, latency, r=3) tensor_cqt = BuildTensor([ cqgram, ], cnn_window) # Global data length max_len = min(tensor_mel.shape[0], tensor_cqt.shape[0]) # Compute output labels notes = mid.getNotes(timidity) notes_onset = np.array(notes)[:, 0] # get only the note timing notes_value = np.array(notes, dtype=np.int)[:, 1] # get only the key value onset_labels = np.zeros(max_len) onset_caracs = np.zeros((max_len, 5)) onset_caracs[:, 2] = np.arange(max_len) note_low = 21 # lowest midi note on a keyboard note_high = 108 # highest midi note on a keyboard notes_labels = np.zeros((max_len, note_high - note_low + 1)) notes_caracs = np.zeros((max_len, note_high - note_low + 1)) for i in range(len(notes_onset)): t_win = int(np.floor( (notes_onset[i] + audio_pad[0] / audio.Fs) / step)) if t_win >= len(onset_labels): break if t_win >= 0: onset_labels[t_win] = 1 onset_caracs[t_win][0] += 1 # nb_notes onset_caracs[t_win][1] = max(onset_caracs[t_win][1], notes[i][2]) # max volume if t_win + 1 < len(onset_labels): onset_caracs[t_win + 1:, 2] -= onset_caracs[t_win + 1][ 2] # nb of blank windows since the last onset n = notes_value[i] - note_low notes_labels[t_win][n] = 1 notes_caracs[t_win][n] = notes[i][2] # volume counter = 0 for i in range(len(onset_labels) - 1, -1, -1): onset_caracs[i][3] = counter if onset_labels[i] == 1: counter = 0 else: counter += 1 onset_caracs[:, 4] = np.minimum(onset_caracs[:, 2], onset_caracs[:, 3]) # Extract useful CQT select = [i for i in range(max_len) if onset_labels[i] > 0] tensor_cqt_select = np.take(tensor_cqt, select, axis=0) notes_labels_select = np.take(notes_labels, select, axis=0) notes_caracs_select = np.take(notes_caracs, select, axis=0) if not get_raw: return (tensor_mel[:max_len, ...], tensor_cqt_select, onset_labels, onset_caracs, notes_labels_select, notes_caracs_select, dname) else: return (melgrams, tensor_mel, onset_labels, cqgram, tensor_cqt, time, FreqAxisLog, max_len, step)