def slice_C(self, start, duration, target_frame_count, magnitude_only=True, bins_per_tone=1, filter_scale=2, highest_note='C8', lowest_note='A0', nbins=None): """ Calculates the CQT and slices a part of it. If nbins is specified, the highest note is ignored """ if nbins is None: nbins = int((librosa.note_to_midi(highest_note) - librosa.note_to_midi(lowest_note)) * bins_per_tone) C = librosa.cqt(self.wf, sr=self.sr, fmin=librosa.note_to_hz(lowest_note), n_bins=nbins, bins_per_octave=int(12 * bins_per_tone), filter_scale=2, hop_length=self.hl) if magnitude_only: C = np.abs(C) t = self._seconds_to_frames(start + duration) s = self._seconds_to_frames(start) return self._resize(C[:, s:t], target_frame_count)
def __test(tuning, accidental, octave, round_midi): note = 'C{:s}'.format(accidental) if octave is not None: note = '{:s}{:d}'.format(note, octave) else: octave = 0 if tuning is not None: note = '{:s}{:+d}'.format(note, tuning) else: tuning = 0 midi_true = 12 * (octave + 1) + tuning * 0.01 if accidental == '#': midi_true += 1 elif accidental in list('b!'): midi_true -= 1 midi = librosa.note_to_midi(note, round_midi=round_midi) if round_midi: midi_true = np.round(midi_true) eq_(midi, midi_true) midi = librosa.note_to_midi([note], round_midi=round_midi) eq_(midi[0], midi_true)
def testAllNoteheadTypes(self): staff = musicscore_pb2.Staff( staffline_distance=10, center_line=[Point(x=0, y=50), Point(x=100, y=50)], glyph=[ Glyph(type=Glyph.CLEF_TREBLE, x=1, y_position=reader.TREBLE_CLEF_EXPECTED_Y), Glyph(type=Glyph.NOTEHEAD_FILLED, x=10, y_position=-6), Glyph(type=Glyph.NOTEHEAD_EMPTY, x=10, y_position=-6), Glyph(type=Glyph.NOTEHEAD_WHOLE, x=10, y_position=-6), ]) notes = conversions.page_to_notesequence( reader.ScoreReader().read_page( musicscore_pb2.Page( system=[musicscore_pb2.StaffSystem(staff=[staff])]))) self.assertEqual( notes, music_pb2.NoteSequence(notes=[ Note(pitch=librosa.note_to_midi('C4'), start_time=0, end_time=1), Note(pitch=librosa.note_to_midi('C4'), start_time=1, end_time=3), Note(pitch=librosa.note_to_midi('C4'), start_time=3, end_time=7), ]))
def __test(tuning, accidental, octave, round_midi): note = 'C{:s}'.format(accidental) if octave is not None: note = '{:s}{:d}'.format(note, octave) else: octave = 0 if tuning is not None: note = '{:s}{:+d}'.format(note, tuning) else: tuning = 0 midi_true = 12 * (octave + 1) + tuning * 0.01 if accidental == '#': midi_true += 1 elif accidental in list('b!'): midi_true -= 1 midi = librosa.note_to_midi(note, round_midi=round_midi) if round_midi: midi_true = np.round(midi_true) assert midi == midi_true midi = librosa.note_to_midi([note], round_midi=round_midi) assert midi[0] == midi_true
def testBeams(self): beam_1 = musicscore_pb2.LineSegment(start=Point(x=10, y=20), end=Point(x=40, y=20)) beam_2 = musicscore_pb2.LineSegment(start=Point(x=70, y=40), end=Point(x=90, y=40)) beam_3 = musicscore_pb2.LineSegment(start=Point(x=70, y=60), end=Point(x=90, y=60)) staff = musicscore_pb2.Staff( staffline_distance=10, center_line=[Point(x=0, y=50), Point(x=100, y=50)], glyph=[ Glyph(type=Glyph.CLEF_TREBLE, x=1, y_position=reader.TREBLE_CLEF_EXPECTED_Y), # 2 eighth notes. Glyph(type=Glyph.NOTEHEAD_FILLED, x=10, y_position=-4, beam=[beam_1]), Glyph(type=Glyph.NOTEHEAD_FILLED, x=40, y_position=-1, beam=[beam_1]), # 1 quarter note. Glyph(type=Glyph.NOTEHEAD_FILLED, x=50, y_position=0), # 2 sixteenth notes. Glyph(type=Glyph.NOTEHEAD_FILLED, x=60, y_position=-2, beam=[beam_2, beam_3]), Glyph(type=Glyph.NOTEHEAD_FILLED, x=90, y_position=2, beam=[beam_2, beam_3]), ]) notes = conversions.page_to_notesequence( reader.ScoreReader().read_page( musicscore_pb2.Page( system=[musicscore_pb2.StaffSystem(staff=[staff])]))) self.assertEqual( notes, music_pb2.NoteSequence(notes=[ Note(pitch=librosa.note_to_midi('E4'), start_time=0, end_time=0.5), Note(pitch=librosa.note_to_midi('A4'), start_time=0.5, end_time=1), Note(pitch=librosa.note_to_midi('B4'), start_time=1, end_time=2), Note(pitch=librosa.note_to_midi('G4'), start_time=2, end_time=2.25), Note(pitch=librosa.note_to_midi('D5'), start_time=2.25, end_time=2.5), ]))
def createMidiRhythmScore(midi_filename, onset_frames_index_of_16th_notes, strong_onset_frames_index_of_16th_notes, weak_onset_frames_index_of_16th_notes, bpm, auftakt_16th_notes_number=0): #MIDIデータの作成 #16分音符のtickでの単位あたりの長さ ticks_per_16th_note = 120 ticks_per_beat = ticks_per_16th_note * 4 #4分音符は480がデフォルト #各音符の配置される場所(tick) onset_ticks = np.array( onset_frames_index_of_16th_notes) * ticks_per_16th_note strong_onset_ticks = np.array( strong_onset_frames_index_of_16th_notes) * ticks_per_16th_note weak_onset_ticks = np.array( weak_onset_frames_index_of_16th_notes) * ticks_per_16th_note #auftaktの処理(本来mido自体をいじるべきだが便宜上ここで) #onset_ticks = list(filter(lambda x: x >= ticks_per_16th_note * auftakt_16th_notes_number, onset_ticks)) #strong_onset_ticks = list(filter(lambda x: x >= ticks_per_16th_note * auftakt_16th_notes_number, strong_onset_ticks)) #weak_onset_ticks = list(filter(lambda x: x >= ticks_per_16th_note * auftakt_16th_notes_number, weak_onset_ticks)) #事前処理 smf = mido.MidiFile(ticks_per_beat=ticks_per_beat) track = mido.MidiTrack() track.append(mido.MetaMessage('set_tempo', tempo=mido.bpm2tempo(bpm))) track.append(mido.Message('program_change', program=1)) #音色 #音符入力 #最初だけデルタタイム入れる onset_ticks_diff = np.diff(onset_ticks) #auftaktの処理 #track.append(mido.Message('note_off',time=(ticks_per_16th_note * 12))) track.append( mido.Message('note_off', time=(ticks_per_16th_note * 16) - (ticks_per_16th_note * auftakt_16th_notes_number))) i = 0 for i in range(len(onset_ticks) - 1): delta = onset_ticks[i + 1] - onset_ticks[i] if onset_ticks[i] in strong_onset_ticks: track.append( mido.Message('note_on', velocity=100, note=librosa.note_to_midi('F3'))) track.append(mido.Message('note_off', time=delta)) track.append( mido.Message('note_off', note=librosa.note_to_midi('F3'))) elif onset_ticks[i] in weak_onset_ticks: track.append( mido.Message('note_on', velocity=50, note=librosa.note_to_midi('A3'))) track.append(mido.Message('note_off', time=delta)) track.append( mido.Message('note_off', note=librosa.note_to_midi('A3'))) track.append(mido.MetaMessage('end_of_track')) smf.tracks.append(track) #midiの出力 smf.save(midi_filename)
def gen_frame_info_from_notes(midi_notes, t_unit=0.02): tmp_midi = pretty_midi.PrettyMIDI() inst = pretty_midi.Instrument(program=0) inst.notes += midi_notes tmp_midi.instruments.append(inst) piano_roll = tmp_midi.get_piano_roll(fs=round(1/t_unit)).transpose() low = librosa.note_to_midi("A0") hi = librosa.note_to_midi("C8")+1 piano_roll = piano_roll[:, low:hi] return gen_frame_info(piano_roll, t_unit=t_unit)
def transition_matrix(note_min, note_max, p_stay_note, p_stay_silence): """ Returns the transition matrix with one silence state and two states (onset and sustain) for each note. Parameters ---------- note_min : string, 'A#4' format Lowest note supported by this transition matrix note_max : string, 'A#4' format Highest note supported by this transition matrix p_stay_note : float, between 0 and 1 Probability of a sustain state returning to itself. p_stay_silence : float, between 0 and 1 Probability of the silence state returning to itselt. Returns ------- T : numpy 2x2 array Trasition matrix in which T[i,j] is the probability of going from state i to state j """ midi_min = librosa.note_to_midi(note_min) midi_max = librosa.note_to_midi(note_max) n_notes = midi_max - midi_min + 1 p_ = (1 - p_stay_silence) / n_notes p__ = (1 - p_stay_note) / (n_notes + 1) # Transition matrix: # State 0 = silence # States 1, 3, 5... = onsets # States 2, 4, 6... = sustains T = np.zeros((2 * n_notes + 1, 2 * n_notes + 1)) # State 0: silence T[0, 0] = p_stay_silence for i in range(n_notes): T[0, (i * 2) + 1] = p_ # States 1, 3, 5... = onsets for i in range(n_notes): T[(i * 2) + 1, (i * 2) + 2] = 1 # States 2, 4, 6... = sustains for i in range(n_notes): T[(i * 2) + 2, 0] = p__ T[(i * 2) + 2, (i * 2) + 2] = p_stay_note for j in range(n_notes): T[(i * 2) + 2, (j * 2) + 1] = p__ return T
def transpose(label, n_semitones): """Transpose a chord label by some number of semitones Parameters ---------- label : str A chord string n_semitones : float The number of semitones to move `label` Returns ------- label_transpose : str The transposed chord label """ # Otherwise, split off the note from the modifier match = re.match("(?P<note>[A-G][b#]*)(?P<mod>.*)", label) if not match: return label note = match.group("note") new_note = librosa.midi_to_note(librosa.note_to_midi(note) + n_semitones, octave=False) return new_note + match.group("mod")
def ann_data(self, idx): jam = jams.load(self.jams_path(idx)) frame_period = 4096 / 44100 anns = jam.search(namespace='pitch_class') key_mats = [] root_mats = [] for a in anns: num_frames = (a.duration - frame_period / 2) / frame_period f_times = np.arange( np.floor(num_frames)) * frame_period + frame_period / 2 frame_list = a.to_samples(f_times) frame_rel_root = [] frame_key = [] for frame in frame_list: if len(frame) == 0: frame_rel_root.append(12) frame_key.append(12) else: frame_key.append( librosa.note_to_midi(frame[0]['tonic']) % 12) frame_rel_root.append(frame[0]['pitch']) key_mats.append(np.eye(13)[frame_key]) root_mats.append(np.eye(13)[frame_rel_root]) return { 'key': np.mean(key_mats, axis=0), 'root': np.mean(root_mats, axis=0) }
def transpose(label, n_semitones): '''Transpose a chord label by some number of semitones Parameters ---------- label : str A chord string n_semitones : float The number of semitones to move `label` Returns ------- label_transpose : str The transposed chord label ''' # Otherwise, split off the note from the modifier match = re.match(six.text_type('(?P<note>[A-G][b#]*)(?P<mod>.*)'), six.text_type(label)) if not match: return label note = match.group('note') new_note = librosa.midi_to_note(librosa.note_to_midi(note) + n_semitones, octave=False) return new_note + match.group('mod')
def to_midi(notes, t_unit=0.02): """Translate the intermediate data into final output MIDI file.""" midi = pretty_midi.PrettyMIDI() piano = pretty_midi.Instrument(program=0) # Some tricky steps to determine the velocity of the notes l_bound, u_bound = find_min_max_stren(notes) s_low = 60 s_up = 127 v_map = lambda stren: int(s_low + ((s_up - s_low) * ( (stren - l_bound) / (u_bound - l_bound + 0.0001))) # noqa: E226 ) low_b = note_to_midi("A0") coll = set() for note in notes: pitch = note["pitch"] + low_b start = note["start"] * t_unit end = note["end"] * t_unit volume = v_map(note["stren"]) coll.add(pitch) m_note = pretty_midi.Note(velocity=volume, pitch=pitch, start=start, end=end) piano.notes.append(m_note) midi.instruments.append(piano) return midi
def make_midi(input_wav, notes, tempo, mean_beat, instrument, velocity): mid = mido.MidiFile() track = mido.MidiTrack() mid.tracks.append(track) tempo = mido.bpm2tempo(tempo) track.append( mido.MetaMessage('set_tempo', tempo=round(tempo * slow), time=0)) track.append(mido.Message('program_change', program=instrument, time=0)) for note in notes: gap = int(round((note[1] * 480) / mean_beat)) if (note[0] == 'r'): track.append(mido.Message('note_on', note=60, velocity=0, time=0)) track.append( mido.Message('note_off', note=60, velocity=0, time=gap)) else: note_num = librosa.note_to_midi(note[0]) track.append( mido.Message('note_on', note=note_num, velocity=velocity, time=0)) track.append( mido.Message('note_off', note=note_num, velocity=velocity, time=gap)) output_midi = getFilename(input_wav, instrument) # mid.save(output_midi) return mid, output_midi
def to_midi(notes, t_unit=0.02): midi = pretty_midi.PrettyMIDI() piano = pretty_midi.Instrument(program=0) l, u = find_min_max_stren(notes) s_low = 60 s_up = 127 v_map = lambda stren: int(s_low + ((s_up - s_low) * ((stren - l) / (u - l)))) low_b = note_to_midi("A0") coll = set() for nn in notes: pitch = nn["pitch"] + low_b start = nn["start"] * t_unit end = nn["end"] * t_unit coll.add(v_map(nn["stren"])) m_note = pretty_midi.Note(velocity=v_map(nn["stren"]), pitch=pitch, start=start, end=end) piano.notes.append(m_note) midi.instruments.append(piano) return midi
def gen_onsets_info(data, t_unit=0.02): #logging.debug("Data shape: %s", data.shape) pitches = [] intervals = [] lowest_pitch = librosa.note_to_midi("A0") for i in range(data.shape[1]): notes = find_occur(data[:, i], t_unit=t_unit) it = [] for nn in notes: it.append([nn["onset"]*t_unit, (nn["onset"]+2)*t_unit]) if len(intervals)==0 and len(it) > 0: intervals = np.array(it) elif len(it) > 0: intervals = np.concatenate((intervals, np.array(it)), axis=0) # hz = CentralFrequency[i] hz = librosa.midi_to_hz(lowest_pitch+i) for i in range(len(it)): pitches.append(hz) if type(intervals) == list: intervals = np.array([]).reshape((0, 2)) pitches = np.array(pitches) return intervals, pitches
def testEndToEnd(self): with tempfile.TemporaryDirectory() as tmpdir: with engine.get_included_labels_file() as centroids: export_dir = os.path.join(tmpdir, 'export') export_kmeans_centroids.run(centroids.name, export_dir) # Now load the saved model. omr = engine.OMREngine( glyph_classifier_fn=saved_classifier. SavedConvolutional1DClassifier.glyph_classifier_fn(export_dir)) filename = os.path.join(tf.resource_loader.get_data_files_path(), '../testdata/IMSLP00747-000.png') notes = omr.run(filename, output_notesequence=True) # TODO(ringw): Fix the extra note that is detected before the actual # first eighth note. self.assertEqual(librosa.note_to_midi('C4'), notes.notes[1].pitch) self.assertEqual(librosa.note_to_midi('D4'), notes.notes[2].pitch) self.assertEqual(librosa.note_to_midi('E4'), notes.notes[3].pitch)
def get_index(self, label): if label is None: return None try: klass = librosa.note_to_midi(label.replace('m', '')) - 12 if label.endswith('m'): klass += 12 return klass except librosa.ParameterError: return None
def load_samples(): samples = {} for filename in glob('samples/*.wav'): y, _ = librosa.load(filename, sr=44100, mono=True) pitch = os.path.basename(filename).replace('.wav', '') hz = librosa.note_to_midi(pitch) samples[hz] = y print filename, np.mean(y) return samples
def same_key(true_value, estimated_value, semitone_distance=0, same_mode=True, true_major=None): # convert to ints true_minor = true_value.endswith('m') true_int = librosa.note_to_midi(true_value.replace('m', '')) estimated_minor = estimated_value.endswith('m') estimated_int = librosa.note_to_midi(estimated_value.replace('m', '')) if true_major is not None: if true_major and true_minor: return False if not true_major and not true_minor: return False if same_mode and true_minor != estimated_minor: return False if not same_mode and true_minor == estimated_minor: return False if estimated_int - true_int != semitone_distance: return False return True
def testNoteSequence(self): filename = os.path.join(resource_loader.get_data_files_path(), 'testdata/IMSLP00747-000.png') notes = self.engine.run(filename, output_notesequence=True) # TODO(ringw): Fix the extra note that is detected before the actual # first eighth note. self.assertEqual(librosa.note_to_midi('C4'), notes.notes[1].pitch) self.assertEqual(librosa.note_to_midi('D4'), notes.notes[2].pitch) self.assertEqual(librosa.note_to_midi('E4'), notes.notes[3].pitch) self.assertEqual(librosa.note_to_midi('F4'), notes.notes[4].pitch) self.assertEqual(librosa.note_to_midi('D4'), notes.notes[5].pitch) self.assertEqual(librosa.note_to_midi('E4'), notes.notes[6].pitch) self.assertEqual(librosa.note_to_midi('C4'), notes.notes[7].pitch)
def note_to_midi_zeros(annotation): ''' Special function so that zeros represent silence Input: Annotation List taken straight from mtrack Output: 1d np.array containing frequencies instead of note names ''' new_values = np.array([]) for a in annotation: new_a = 0 if a != '0': new_a = librosa.note_to_midi(a) new_values = np.append(new_values, new_a) return new_values
def fix_notes(melody): # generate scales notes_numbers = [notes_to_numbers[x[:-1]] for x in melody['notes'] if x != 'P'] current_notes = set(notes_numbers) # %% possible_scales = list() for note in current_notes: for scale in generate_scales(note): possible_scales.append((scale, len(current_notes.intersection(scale)))) possible_scales = sorted(possible_scales, key=lambda x: x[1], reverse=True) max_intersection = possible_scales[0][1] possible_scales = list(filter(lambda x: x[1] == max_intersection, possible_scales)) substitutions = list() for scale, _ in possible_scales: wrong_notes = current_notes.difference(scale) loss = 0 substitution = dict() for wrong_note in wrong_notes: scale = np.array(list(scale)) differences = np.abs(scale - wrong_note) % 12 loss += np.min(differences) * np.count_nonzero( wrong_note == np.array(notes_numbers)) substitution[wrong_note] = scale[np.argmin(differences)] substitutions.append((substitution, loss)) substitutions = sorted(substitutions, key=lambda x: x[1]) best_substitution = substitutions[0][0] fixed_notes = list() for note in melody['notes']: if note != 'P': if notes_to_numbers[note[:-1]] in best_substitution.keys(): key = notes_to_numbers[note[:-1]] fixed_notes.append(numbers_to_notes[best_substitution[key]] + note[-1]) else: fixed_notes.append(note) else: fixed_notes.append(note) midi = [librosa.note_to_midi(x) if x != 'P' else x for x in fixed_notes] melody['notes'] = fixed_notes melody['midi'] = midi return melody
def load_samples(directory): print '\nLoading samples...' samples = {} directory = os.path.join(directory, '*.wav') for filename in glob(directory): y, _ = librosa.load(filename, sr=44100, mono=True) # Assume the pitch name is the filename pitch = os.path.basename(filename).replace('.wav', '') midi_number = librosa.note_to_midi(pitch) samples[midi_number] = y print '\t', filename print 'Done loading samples.\n' return samples
def gen_onsets_info_from_label_v1(label, inst_num=1, t_unit=0.02): intervals = [] pitches = [] onsets = {} lowest_pitch = librosa.note_to_midi("A0") for t, ll in enumerate(label): for pitch, insts in ll.items(): if inst_num not in insts: continue if (pitch not in onsets) or (insts[inst_num][0] > onsets[pitch]): intervals.append([t*t_unit, (t+2)*t_unit]) pitches.append(librosa.midi_to_hz(lowest_pitch+pitch)) onsets[pitch] = insts[inst_num][0] return np.array(intervals), np.array(pitches)
def cqt_bin_to_int_midi(bin: int, fmin_note: str, bins_per_octave: int) -> int: """ Convert cqt bin into midi number rounded to the nearest integer Args: bin: frequency bin number fmin_note: minimum frequency used to generate CQT in note scale bins_per_octave: bins per octave used to generate CQT Returns: midi number corresponding to the bin rounded to the nearest integer """ bins_per_semitone = bins_per_octave / 12 midi_number = bin / bins_per_semitone midi_number += librosa.note_to_midi(fmin_note) return int(midi_number) if midi_number % 1 < 0.5 else int(midi_number) + 1
def cqt_bin_to_hz(bin: int, fmin_note: str, bins_per_octave: int) -> float: """ Convert cqt bin into frequency in hz Args: bin: frequency bin number fmin_note: mininum frequency used to generate CQT in note scale bins_per_octave: bins per octave used to generate CQT Returns: frequency corresponding to the bin in Hz """ bins_per_semitone = bins_per_octave / 12 midi_number = bin / bins_per_semitone midi_number += librosa.note_to_midi(fmin_note) return librosa.midi_to_hz(midi_number)
def testTrebleClef(self): self.assertEqual(clef.TrebleClef().y_position_to_midi(-8), librosa.note_to_midi('A3')) self.assertEqual(clef.TrebleClef().y_position_to_midi(-6), librosa.note_to_midi('C4')) self.assertEqual(clef.TrebleClef().y_position_to_midi(0), librosa.note_to_midi('B4')) self.assertEqual(clef.TrebleClef().y_position_to_midi(1), librosa.note_to_midi('C5')) self.assertEqual(clef.TrebleClef().y_position_to_midi(3), librosa.note_to_midi('E5')) self.assertEqual(clef.TrebleClef().y_position_to_midi(4), librosa.note_to_midi('F5')) self.assertEqual(clef.TrebleClef().y_position_to_midi(14), librosa.note_to_midi('B6'))
def testBassClef(self): self.assertEqual(clef.BassClef().y_position_to_midi(-10), librosa.note_to_midi('A1')) self.assertEqual(clef.BassClef().y_position_to_midi(-7), librosa.note_to_midi('D2')) self.assertEqual(clef.BassClef().y_position_to_midi(-5), librosa.note_to_midi('F2')) self.assertEqual(clef.BassClef().y_position_to_midi(-1), librosa.note_to_midi('C3')) self.assertEqual(clef.BassClef().y_position_to_midi(0), librosa.note_to_midi('D3')) self.assertEqual(clef.BassClef().y_position_to_midi(6), librosa.note_to_midi('C4')) self.assertEqual(clef.BassClef().y_position_to_midi(8), librosa.note_to_midi('E4'))
def _key_sig_pitch_classes(note_name, ascending_fifths): first_pitch_class = ( librosa.note_to_midi(note_name + '0') % constants.NUM_SEMITONES_PER_OCTAVE) # Go through the circle of fifths in ascending or descending order. step = 1 if ascending_fifths else -1 order = constants.CIRCLE_OF_FIFTHS[::step] # Get the start index for the key signature. first_pitch_class_ind = order.index(first_pitch_class) return list( itertools.islice( # Create a cycle of the order. We may loop around, e.g. from F back to # C. itertools.cycle(order), # Take the 7 pitch classes from the cycle. first_pitch_class_ind, first_pitch_class_ind + constants.NUM_NOTES_IN_DIATONIC_SCALE))
def notetomidivalue(note): str = "" lx = [] if len(note) < 4: note = [note] else: for char in note: if char in [ "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "0" ]: char = char.replace("1", "2").replace("0", "2").replace( "6", "5").replace("7", "5").replace("8", "5") str += char lx.append(str) str = "" else: str += char return librosa.note_to_midi(lx)
def resynthesize_sources_with_onset_templates(W, H, pitches, signal): lh_max_pitch = librosa.note_to_midi('A3') components = numpy.array(sorted(pitches) + sorted(pitches)) H1 = numpy.copy(H) H2 = numpy.copy(H) H1[components > lh_max_pitch, :] = 0 H2[components <= lh_max_pitch, :] = 0 Y1 = numpy.dot(W, H1)*signal.X_phase Y2 = numpy.dot(W, H2)*signal.X_phase print('Left hand:') reconstructed_signal1 = librosa.istft(Y1, length=len(signal.x)) ipd.display(ipd.Audio(reconstructed_signal1, rate=signal.sr)) print('Right hand:') reconstructed_signal2 = librosa.istft(Y2, length=len(signal.x)) ipd.display(ipd.Audio(reconstructed_signal2, rate=signal.sr))
def to_midi(pred, out_path, velocity=100, threshold=0.4, t_unit=0.02): midi = pretty_midi.PrettyMIDI() piano = pretty_midi.Instrument(program=0) notes = [] pred = np.where(pred > threshold, 1, 0) pred = merge_channels(pred) pitch_offset = librosa.note_to_midi("A0") #print("Transformed shape: ", pred.shape) plt.imshow(pred.transpose(), origin="lower", aspect=20) plt.savefig("{}.png".format(out_path), dpi=250) for i in range(pred.shape[1]): pp = pred[:, i] candy = np.where(pp > 0.5)[0] if len(candy) == 0: # No pitch present continue shift = np.insert(candy, 0, 0)[:-1] diff = candy - shift on_idx = np.where(diff > 1)[0] onsets = candy[on_idx] offsets = shift[on_idx[1:]] offsets = np.append(offsets, candy[-1]) for ii in range(len(onsets)): on_t = onsets[ii] * t_unit off_t = offsets[ii] * t_unit note = pretty_midi.Note(velocity=velocity, pitch=i + pitch_offset, start=on_t, end=off_t) notes.append(note) piano.notes = notes midi.instruments.append(piano) if not out_path.endswith(".mid"): out_path += ".mid" midi.write(out_path) return midi
def get_midi_data_slivet(y,fs): """ Use SLIVET note transcription method to get melodic sequence. """ data = vamp.collect(y,fs,'silvet:silvet') labels = [] start_t = [] end_t = [] for d in data['list']: n = librosa.note_to_midi(d['label']) # pc = coreutils.midi_note_to_pc(n) # label = 'pc' + str(pc) st = d['timestamp'].to_float() # et = st + d['duration'].to_float() # labels.append(label) labels.append(n) start_t.append(st) # end_t.append(et) end_t = start_t[1:] # for some reason the last duration gets # screwed up # st = start_t[-1] # et = st + (float(y.shape[0])/float(fs)) delta_t = start_t[-1] - start_t[-2] et = end_t[-1] + delta_t end_t.append(et) # to pitch class representation labels = ['pc' + str(coreutils.midi_note_to_pc(n)) for n in labels] melody_sequence = coredata.Sequence(labels=labels,\ start_times=start_t,end_times=end_t) return melody_sequence
def logfrequency(sr, n_fft, bins_per_octave=12, tuning=0.0, fmin=None, fmax=None, spread=0.125): '''Approximate a constant-Q filterbank for a fixed-window STFT. Each filter is a log-normal window centered at the corresponding pitch frequency. :usage: >>> # Simple log frequency filters >>> logfs_fb = librosa.filters.logfrequency(22050, 4096) >>> # Use a narrower frequency range >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, fmin=110, fmax=880) >>> # Use narrower filters for sparser response: 5% of a semitone >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, spread=0.05) >>> # Or wider: 50% of a semitone >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, spread=0.5) :parameters: - sr : int > 0 audio sampling rate - n_fft : int > 0 FFT window size - bins_per_octave : int > 0 Number of bins per octave. Defaults to 12 (semitones). - tuning : None or float in [-0.5, +0.5] Tuning correction parameter, in fractions of a bin. - fmin : float > 0 Minimum frequency bin. Defaults to ``C1 ~= 16.35`` - fmax : float > 0 Maximum frequency bin. Defaults to ``C9 = 4816.01`` - spread : float > 0 Spread of each filter, as a fraction of a bin. :returns: - C : np.ndarray, shape=(ceil(log(fmax/fmin)) * bins_per_octave, 1 + n_fft/2) CQT filter bank. ''' if fmin is None: fmin = librosa.midi_to_hz(librosa.note_to_midi('C1')) if fmax is None: fmax = librosa.midi_to_hz(librosa.note_to_midi('C9')) # Apply tuning correction correction = 2.0**(float(tuning) / bins_per_octave) # How many bins can we get? n_filters = int(np.ceil(bins_per_octave * np.log2(float(fmax) / fmin))) # What's the shape parameter for our log-normal filters? sigma = float(spread) / bins_per_octave # Construct the output matrix basis = np.zeros( (n_filters, n_fft /2 + 1) ) # Get log frequencies of bins log_freqs = np.log2(librosa.fft_frequencies(sr, n_fft)[1:]) for i in range(n_filters): # What's the center (median) frequency of this filter? center_freq = correction * fmin * (2.0**(float(i)/bins_per_octave)) # Place a log-normal window around center_freq # We skip the sqrt(2*pi) normalization because it will wash out below anyway basis[i, 1:] = np.exp(-0.5 * ((log_freqs - np.log2(center_freq)) /sigma)**2 - np.log2(sigma) - log_freqs) # Normalize each filter c_norm = np.sqrt(np.sum(basis[i]**2)) if c_norm > 0: basis[i] = basis[i] / c_norm return basis
def constant_q(sr, fmin=None, fmax=None, bins_per_octave=12, tuning=0.0, window=None, resolution=2, pad=False): '''Construct a constant-Q basis. :usage: >>> # Get the CQT basis for C1 to C9, standard tuning >>> basis = librosa.filters.constant_q(22050) >>> CQT = librosa.cqt(y, sr, basis=basis) >>> # Change the windowing function to Hanning instead of Hamming >>> basis = librosa.filters.constant_q(22050, window=np.hanning) >>> # Use a longer window for each filter >>> basis = librosa.filters.constant_q(22050, resolution=2) :parameters: - sr : int > 0 Audio sampling rate - fmin : float > 0 Minimum frequency bin. Defaults to ``C1 ~= 16.35`` - fmax : float > 0 Maximum frequency bin. Defaults to ``C9 = 4816.01`` - bins_per_octave : int > 0 Number of bins per octave - tuning : float in [-0.5, +0.5) Tuning deviation from A440 in fractions of a bin - window : function or None Windowing function to apply to filters. If None, no window is applied. Default: np.hamming - resolution : float > 0 Resolution of filter windows. Larger values use longer windows. - pad : boolean Zero-pad all filters to have a constant width (equal to the longest filter). .. note:: @phdthesis{mcvicar2013, title = {A machine learning approach to automatic chord extraction}, author = {McVicar, M.}, year = {2013}, school = {University of Bristol}} :returns: - filters : list of np.ndarray filters[i] is the time-domain representation of the i'th CQT basis. ''' if fmin is None: fmin = librosa.midi_to_hz(librosa.note_to_midi('C1')) if fmax is None: fmax = librosa.midi_to_hz(librosa.note_to_midi('C9')) if window is None: window = np.hamming correction = 2.0**(float(tuning) / bins_per_octave) fmin = correction * fmin fmax = correction * fmax # Q should be capitalized here, so we suppress the name warning Q = float(resolution) / (2.0**(1./bins_per_octave) - 1) # pylint: disable=invalid-name # How many bins can we get? n_filters = int(np.ceil(bins_per_octave * np.log2(float(fmax) / fmin))) filters = [] for i in np.arange(n_filters, dtype=float): # Length of this filter ilen = np.ceil(Q * sr / (fmin * 2.0**(i / bins_per_octave))) # Build the filter win = np.exp(Q * 1j * np.linspace(0, 2 * np.pi, ilen, endpoint=False)) # Apply the windowing function if window is not None: win = win * window(ilen) # Normalize win = librosa.util.normalize(win, norm=2) filters.append(win) if pad: max_len = max(map(len, filters)) for i in range(len(filters)): filters[i] = librosa.util.pad_center(filters[i], max_len) return filters
Mandatory argument : file to factorize Optional arguments : pitch_min (smallest pitch considered), pitch_max (biggest pitch considered), filtering (true or false) ''' import sys if len(sys.argv) <= 1: print usage sys.exit(-1) from librosa import load, stft, logamplitude, note_to_midi, midi_to_hz import numpy as np filename = sys.argv[1] pitch_min = note_to_midi('C1') if len(sys.argv) > 2: pitch_min = note_to_midi(sys.argv[2]) pitch_max = note_to_midi('C7') if len(sys.argv) > 3: pitch_max = note_to_midi(sys.argv[3]) pitches = range(pitch_min, pitch_max + 1) #pitches = note_to_midi(['C4', 'D4', 'E4', 'F4', 'G4', 'A4', 'B4', 'C5']) filtering = True if len(sys.argv) > 4: if sys.argv[4] == "false": filtering = False elif sys.argv[4] == "true":
def pstrip(x): root = re.match(six.text_type('([A-G][b#]*).*'), six.text_type(x)).groups()[0] return librosa.note_to_midi(root)
def constant_q(sr, fmin=None, n_bins=84, bins_per_octave=12, tuning=0.0, window=None, resolution=2, pad=False, **kwargs): r'''Construct a constant-Q basis. :usage: >>> # Change the windowing function to Hamming instead of Hann >>> basis = librosa.filters.constant_q(22050, window=np.hamming) >>> # Use a longer window for each filter >>> basis = librosa.filters.constant_q(22050, resolution=3) >>> # Pad the basis to fixed length >>> basis = librosa.filters.constant_q(22050, pad=True) :parameters: - sr : int > 0 Audio sampling rate - fmin : float > 0 Minimum frequency bin. Defaults to ``C2 ~= 32.70`` - n_bins : int > 0 Number of frequencies. Defaults to 7 octaves (84 bins). - bins_per_octave : int > 0 Number of bins per octave - tuning : float in [-0.5, +0.5) Tuning deviation from A440 in fractions of a bin - window : function or ``None`` Windowing function to apply to filters. If ``None``, no window is applied. Default: scipy.signal.hann - resolution : float > 0 Resolution of filter windows. Larger values use longer windows. - pad : boolean Pad all filters to have a constant width (equal to the longest filter). By default, padding is done with zeros, but this can be overridden by setting the ``mode=`` field in *kwargs*. - *kwargs* Additional keyword arguments to ``np.pad()`` when ``pad==True``. .. note:: - McVicar, Matthew. "A machine learning approach to automatic chord extraction." Dissertation, University of Bristol. 2013. :returns: - filters : list of np.ndarray, ``len(filters) == n_bins`` ``filters[i]`` is ``i``\ th CQT basis filter (in the time-domain) ''' if fmin is None: fmin = librosa.midi_to_hz(librosa.note_to_midi('C2')) if window is None: window = scipy.signal.hann correction = 2.0**(float(tuning) / bins_per_octave) fmin = correction * fmin # Q should be capitalized here, so we suppress the name warning # pylint: disable=invalid-name Q = float(resolution) / (2.0**(1. / bins_per_octave) - 1) filters = [] for i in np.arange(n_bins, dtype=float): # Length of this filter ilen = np.ceil(Q * sr / (fmin * 2.0**(i / bins_per_octave))) # Build the filter win = np.exp(Q * 1j * np.linspace(0, 2 * np.pi, ilen, endpoint=False)) # Apply the windowing function if window is not None: win = win * window(ilen) # Normalize win = librosa.util.normalize(win, norm=2) filters.append(win) if pad: max_len = max(map(len, filters)) # Use reflection padding, unless otherwise specified for i in range(len(filters)): filters[i] = librosa.util.pad_center(filters[i], max_len, **kwargs) return filters
def __test_fail(): librosa.note_to_midi('does not pass')
if len(argv) > 2: midi_filename = argv[2] n_components=None if len(argv) > 3: n_components = int(argv[3]) from librosa import load, cqt, logamplitude, note_to_midi, note_to_hz import numpy as np # load an audio file (with samplerate) x, sr = load(filename) # compute constant-Q transform (~ pitch-based STFT) #hop_size = 512 pitch_max = note_to_midi('D5') pitch_min = 'B3' pitch_min_number = note_to_midi(pitch_min) C = cqt(x, sr=sr, fmin=note_to_hz(pitch_min), n_bins=pitch_max-pitch_min_number) # try some midi visualization from Midi import midi_matrix midi_mat = midi_matrix(midi_filename, min_pitch=note_to_midi(pitch_min)) # NMF #V = np.log10(1 + 100000 * C**2) V = np.abs(C).transpose() W_zero = np.zeros((pitch_max - pitch_min_number, pitch_max - pitch_min_number))
# See the License for the specific language governing permissions and # limitations under the License. """Defines shared constants used in transcription models.""" from __future__ import absolute_import from __future__ import division from __future__ import print_function # internal imports import librosa import tensorflow as tf MIN_MIDI_PITCH = librosa.note_to_midi('A0') MAX_MIDI_PITCH = librosa.note_to_midi('C8') MIDI_PITCHES = MAX_MIDI_PITCH - MIN_MIDI_PITCH + 1 DEFAULT_CQT_BINS_PER_OCTAVE = 36 DEFAULT_JITTER_AMOUNT_MS = 0 DEFAULT_JITTER_WAV_AND_LABEL_SEPARATELY = False DEFAULT_MIN_FRAME_OCCUPANCY_FOR_LABEL = 0.0 DEFAULT_NORMALIZE_AUDIO = False DEFAULT_ONSET_DELAY = 0 DEFAULT_ONSET_LENGTH = 100 DEFAULT_ONSET_MODE = 'window' DEFAULT_SAMPLE_RATE = 16000 DEFAULT_SPEC_FMIN = 30.0 DEFAULT_SPEC_HOP_LENGTH = 512 DEFAULT_SPEC_LOG_AMPLITUDE = True
def logfrequency(sr, n_fft, n_bins=84, bins_per_octave=12, tuning=0.0, fmin=None, spread=0.125): '''Approximate a constant-Q filterbank for a fixed-window STFT. Each filter is a log-normal window centered at the corresponding frequency. :usage: >>> # Simple log frequency filters >>> logfs_fb = librosa.filters.logfrequency(22050, 4096) >>> # Use a narrower frequency range >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, n_bins=48, fmin=110) >>> # Use narrower filters for sparser response: 5% of a semitone >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, spread=0.05) >>> # Or wider: 50% of a semitone >>> logfs_fb = librosa.filters.logfrequency(22050, 4096, spread=0.5) :parameters: - sr : int > 0 audio sampling rate - n_fft : int > 0 FFT window size - n_bins : int > 0 Number of bins. Defaults to 84 (7 octaves). - bins_per_octave : int > 0 Number of bins per octave. Defaults to 12 (semitones). - tuning : None or float in [-0.5, +0.5] Tuning correction parameter, in fractions of a bin. - fmin : float > 0 Minimum frequency bin. Defaults to ``C2 ~= 32.70`` - spread : float > 0 Spread of each filter, as a fraction of a bin. :returns: - C : np.ndarray, shape=(n_bins, 1 + n_fft/2) log-frequency filter bank. ''' if fmin is None: fmin = librosa.midi_to_hz(librosa.note_to_midi('C2')) # Apply tuning correction correction = 2.0**(float(tuning) / bins_per_octave) # What's the shape parameter for our log-normal filters? sigma = float(spread) / bins_per_octave # Construct the output matrix basis = np.zeros((n_bins, 1 + n_fft/2)) # Get log frequencies of bins log_freqs = np.log2(librosa.fft_frequencies(sr, n_fft)[1:]) for i in range(n_bins): # What's the center (median) frequency of this filter? c_freq = correction * fmin * (2.0**(float(i)/bins_per_octave)) # Place a log-normal window around c_freq basis[i, 1:] = np.exp(-0.5 * ((log_freqs - np.log2(c_freq)) / sigma)**2 - np.log2(sigma) - log_freqs) # Normalize each filter c_norm = np.sqrt(np.sum(basis[i]**2)) if c_norm > 0: basis[i] = basis[i] / c_norm return basis
def constant_q(sr, fmin=None, fmax=None, bins_per_octave=12, tuning=0.0, window=np.hamming, resolution=1): '''Construct a constant-Q basis. :usage: >>> # Get the CQT basis for C1 to C9, standard tuning >>> basis = librosa.filters.constant_q(22050) >>> CQT = librosa.cqt(y, sr, basis=basis) >>> # Change the windowing function to Hanning instead of Hamming >>> basis = librosa.filters.constant_q(22050, window=np.hanning) >>> # Use a longer window for each filter >>> basis = librosa.filters.constant_q(22050, resolution=2) :parameters: - sr : int > 0 Audio sampling rate - fmin : float > 0 Minimum frequency bin. Defaults to ``C1 ~= 16.35`` - fmax : float > 0 Maximum frequency bin. Defaults to ``C9 = 4816.01`` - bins_per_octave : int > 0 Number of bins per octave - tuning : float in [-0.5, +0.5) Tuning deviation from A440 in fractions of a bin - window : function or None Windowing function to apply to filters. If None, no window is applied. Default is to use a hamming window. - resolution : float > 0 Resolution of filter windows. Larger values use longer windows. .. note:: @phdthesis{mcvicar2013, title = {A machine learning approach to automatic chord extraction}, author = {McVicar, M.}, year = {2013}, school = {University of Bristol}} :returns: - filters : list of np.ndarray filters[i] is the time-domain representation of the i'th CQT basis. ''' if fmin is None: fmin = librosa.midi_to_hz(librosa.note_to_midi('C1')) if fmax is None: fmax = librosa.midi_to_hz(librosa.note_to_midi('C9')) correction = 2.0**(float(tuning) / bins_per_octave) fmin = correction * fmin fmax = correction * fmax Q = float(resolution) / (2.0**(1./bins_per_octave) - 1) # How many bins can we get? n_filters = int(np.ceil(bins_per_octave * np.log2(float(fmax) / fmin))) filters = [] for i in np.arange(n_filters, dtype=float): # Length of this filter ilen = np.ceil(Q * sr / (fmin * 2.0**(i / bins_per_octave))) # Build the filter and normalize if window is not None: win = window(ilen) else: win = 1.0 win = win * np.exp(Q * 1j * np.linspace(0, 2 * np.pi, ilen, endpoint=False)) win /= np.sqrt(np.sum(np.abs(win)**2)) filters.append(win) return filters