def get_onsets_and_pitch_labels(midifile): pattern = midi.MIDIFile(midifile) intervals = [] labels = [] for onset, _pitch, duration, velocity, _channel in pattern.sustained_notes: label = int(_pitch) # do not subtract 21; mir_eval needs pitches strictly >= 0 anyways intervals.append([onset, onset + duration]) labels.append(label) return np.array(intervals), np.array(labels)
def load_performance(path, piece, spectrogram_params, coords, coord2onset, sf_path, tempo_factor=1., real_perf=False, transpose=0): if real_perf: wav_path = os.path.join(path, 'performance', piece + f'_{tempo_factor}.wav') midi_path = os.path.join(path, 'performance', piece + '.mid') else: if tempo_factor == -1: # flag to indicate no tempo factor midi_path = os.path.join(path, 'performance', piece + '.mid') else: midi_path = os.path.join(path, 'performance', piece + f'_tempo_{tempo_factor}.mid') midi = mm_midi.MIDIFile(midi_path) if transpose != 0: notes = midi.notes notes[:, 1] += transpose midi = mm_midi.MIDIFile.from_notes(notes) if real_perf and tempo_factor != -1: spec = wav_to_spec_otf(wav_path, spectrogram_params) else: spec = midi_to_spec_otf(midi, spectrogram_params, sound_font_path=sf_path) spec = np.pad(spec, ((0, 0), (spectrogram_params['pad'], 0)), mode='constant') onsets = (midi.notes[:, 0] * spectrogram_params['fps']).astype(int) onsets, coords_new = merge_onsets(onsets, copy.deepcopy(coords), coord2onset[0]) interpol_fnc = interpolate.interp1d(onsets, coords_new.T, kind='previous', bounds_error=False, fill_value=(coords_new[0, :], coords_new[-1, :])) return spec, onsets, coords_new, interpol_fnc
def get_y_from_file(midifile, n_frames, dt): pattern = midi.MIDIFile(midifile) y = np.zeros((n_frames, 88)).astype(np.float32) for onset, _pitch, duration, velocity, _channel in pattern.notes: pitch = int(_pitch) frame_start = int(np.round(onset / dt)) frame_end = int(np.round((onset + duration) / dt)) # even if the event was too short, always produce a label! if frame_start == frame_end: frame_end += 1 label = pitch - 21 y[frame_start:frame_end, label] = 1 return y
def from_midi(annot_path, uid, hop_samples=256, unique_mf0=True): check_dir(CACHED_FILES_PATH) # Check if there is a cached numpy binary cached_path = os.path.join(CACHED_FILES_PATH, "{}_{}.npz".format(uid, hop_samples)) if os.path.isfile(cached_path): times, freqs, notes, voicing = np.load(cached_path).values() return Annotation(times, freqs, notes, voicing) else: pattern = midi.MIDIFile(annot_path) tree = IntervalTree() for onset, _pitch, duration, velocity, _channel in pattern.notes: pitch = int(_pitch) frame_start = onset frame_end = onset + duration tree[frame_start:frame_end] = pitch max_time = max(tree)[1] times = np.arange(0, max_time, hop_samples/44100) notes_mf0 = [] for t in times: notes_at_t = [] for note in tree[t]: notes_at_t.append(note.data) if unique_mf0: # remove duplicate notes notes_at_t = list(set(notes_at_t)) notes_mf0.append(notes_at_t) max_polyphony = np.max([len(frame) for frame in notes_mf0]) freqs = np.zeros((len(times), max_polyphony)) notes = np.zeros((len(times), max_polyphony)) voicing = np.zeros((len(times),), dtype=np.int32) for i, notes_at_i in enumerate(notes_mf0): for j, note in enumerate(notes_at_i): notes[i, j] = note freqs[i, j] = mir_eval.util.midi_to_hz(note) voicing[i] += 1 annot = Annotation(times, freqs, notes, voicing) np.savez(cached_path, annot.times, annot.freqs, annot.notes, annot.voicing) return annot
def get_y_from_file(midifile, n_frames, audio_options): pattern = midi.MIDIFile(midifile) dt = float(audio_options['hop_size']) / float(audio_options['sample_rate']) y_frames = np.zeros((n_frames, 88)).astype(np.float32) y_velocity = np.zeros((n_frames, 88)).astype(np.float32) for onset, _pitch, duration, velocity, _channel in pattern.sustained_notes: pitch = int(_pitch) label = pitch - 21 note_start = int(np.round(onset / dt)) note_end = int(np.round((onset + duration) / dt)) y_frames[note_start:note_end + 1, label] = curve(note_start, note_end + 1) y_velocity[note_start:note_end + 1, label] = velocity / 127. return y_frames, y_velocity
def get_y_from_file(midifile, n_frames, audio_options): with warnings.catch_warnings(): warnings.simplefilter('ignore') pattern = midi.MIDIFile(midifile) dt = float(audio_options['hop_size']) / float( audio_options['sample_rate']) y_onsets = np.zeros((n_frames, 88), dtype=np.uint8) y_frames = np.zeros((n_frames, 88), dtype=np.uint8) y_offsets = np.zeros((n_frames, 88), dtype=np.uint8) for onset, _pitch, duration, velocity, _channel in pattern.sustained_notes: pitch = int(_pitch) label = pitch - 21 note_start = int(np.round(onset / dt)) note_end = int(np.round((onset + duration) / dt)) # some of the midi-files have onsets/offsets larger # than n_frames. they were manually checked, and it's # actually not an issue at all. # see data-preparation/maestro-inconsistencies/* for # scripts that perform visual inspection! if note_start < n_frames: if note_end >= n_frames: # print('weird_offset', midifile) note_end = n_frames - 1 y_onsets[note_start, label] = 1 y_frames[note_start:note_end + 1, label] = 1 y_offsets[note_end, label] = 1 else: # print('weird_onset', midifile) pass return y_onsets, y_frames, y_offsets
def test_midi(self): run_single(self.bin, stereo_sample_file, tmp_result, args=['--midi']) result = midi.MIDIFile(tmp_result).notes self.assertTrue(np.allclose(result[:, :2], self.result, atol=1e-3))