def generate_note(self, f0_info, n_duration, round_to_sixteenth=True): f0 = f0_info[0] a = remap(f0_info[1], self.cqt.min(), self.cqt.max(), 0, 1) duration = librosa.frames_to_time(n_duration, sr=self.sr, hop_length=self.hop_length) note_duration = 0.02 * np.around(duration / 0.02) # Round to 2 decimal places for music21 compatibility midi_duration = second_to_quarter(duration, self.tempo) midi_velocity = int(round(remap(f0_info[1], self.cqt.min(), self.cqt.max(), 80, 120))) if round_to_sixteenth: midi_duration = round(midi_duration * 16) / 16 try: if f0 is None: midi_note = None note_info = Rest(type=self.mm.secondsToDuration(note_duration).type) f0 = 0 else: midi_note = round(librosa.hz_to_midi(f0)) note = Note(librosa.midi_to_note(midi_note), type=self.mm.secondsToDuration(note_duration).type) note.volume.velocity = midi_velocity note_info = [note] except DurationException: if f0 is None: midi_note = None note_info = Rest(type='32nd') f0 = 0 else: midi_note = round(librosa.hz_to_midi(f0)) note = Note(librosa.midi_to_note(midi_note), type='eighth') note.volume.velocity = midi_velocity note_info = [note] midi_info = [midi_note, midi_duration, midi_velocity] n = np.arange(librosa.frames_to_samples(n_duration, hop_length=self.hop_length)) sine_wave = a * np.sin(2 * np.pi * f0 * n / float(self.sr)) return [sine_wave, midi_info, note_info]
def evaluate(model, wavfile='sample/waltz_for_toutzy.wav', log=1, YLIM=[0, 48]): # log = 1 # if 1: if hasattr(model, 'model'): model = model.model # wavfile = 'sample/waltz_for_toutzy_50.wav' # wavfile = 'sample/Tamacun.wav' # wavfile = 'sample/MIDI/composer-bach-edition-bg-genre-cant-work-0002-format-midi1-multi-zip-number-01.wav' p = util.piece(wavfile) print p.x0.max() if p.x0.dtype == 'int16': p.x0 = p.x0.astype('float32') p.x0 = p.x0 / 2**15 p.xs = p.x0 # p.xs = p.xs.astype('float32') p.downsample(16000) # p.bitrate = 18000 # p.trimto(18,26) # p.trimto(28,40) p.trimto(50, 66) # p.trimto(60,100) # p.trimto(100,140) # print len(p.xs) chunks = to_chunk(p, 20)[:] chunks = np.array(chunks) # chunks = util_midi.norm_by_rmsq(chunks,norm = 1) eps = 1E-8 plt.figure(figsize=[12, 6]) mroll = transcribe(chunks, model, chroma=0, log=log) ytk = librosa.midi_to_note(range(0, 128)) plt.yticks(np.arange(0, 128) + .5, librosa.midi_to_note(range(0, 128))) # plt.ylim(40,78) plt.grid() # plt.ylim(0,36) plt.ylim(YLIM) ipd.display(ipd.Audio(p.xs, rate=16000)) plt.figure(figsize=[12, 6]) # print p.xs.dtype cqt(p) # plt.yscale('log') plt.yticks(np.exp(np.linspace(*np.log(plt.gca().get_ylim()), num=48)), librosa.midi_to_note(np.arange(48) + 24)) plt.show() return mroll
def get_chord(self, poly='random', top_k=None): for i in range(10): try: if poly == 'random': poly = np.random.randint(1, 7) if poly in [1, 2]: notes = list( np.random.randint( 40, self.chords.midi_notes.values.max()[0] + 1, size=poly)) else: chord_df = random.choice( [self.chords, self.top_chords.iloc[:top_k]]) df = chord_df[chord_df.poly == poly].sample(1) notes = df.midi_notes.values[0] if self.verbose: print(poly) print('notes', notes, librosa.midi_to_note(notes)) try: print('TAB:', df.joint.values) except: pass break except Exception as ex: print(ex, 'Retry', i + 1) return notes
def render(self, width, force_chord): result = np.full((OVERFLOW_WIDTH_LIMIT, 3), '', dtype='<U1') air = self.air scale = width / (self.pos_end - self.pos_start) already_mute = False for pos in range(self.pos_start, self.pos_end): render_x = int(np.round((pos - self.pos_start) * scale)) for type in range(3): #for type in ['melody','lyric','chord'] render_str = '' if (type == 0): #melody if (air.melody_onset[pos]): render_str = librosa.midi_to_note(air.melody[pos])[:2] already_mute = False if (air.melody[pos] == -1 and not already_mute): render_str = '0' already_mute = True elif (type == 1): #lyric if (air.lyric[pos] != '' and air.lyric[pos] != '-'): render_str = air.lyric[pos] else: #chord if (pos == 0 or air.chord[pos] != air.chord[pos - 1] or (force_chord and pos == self.pos_start)): render_str = air.chord[pos].replace(':', '') chars = list(render_str) render_len = min(len(chars), OVERFLOW_WIDTH_LIMIT - render_x) result[render_x:render_x + render_len, type] = chars[:render_len] return result
def transpose(label, n_semitones): """Transpose a chord label by some number of semitones Parameters ---------- label : str A chord string n_semitones : float The number of semitones to move `label` Returns ------- label_transpose : str The transposed chord label """ # Otherwise, split off the note from the modifier match = re.match("(?P<note>[A-G][b#]*)(?P<mod>.*)", label) if not match: return label note = match.group("note") new_note = librosa.midi_to_note(librosa.note_to_midi(note) + n_semitones, octave=False) return new_note + match.group("mod")
def init_list(): # 识别音符范围是G3~E7,MIDI Number从53到100,但左右各多取一个音符以便确定频率的截止带宽 for midi in range(54, 102): pitch = librosa.midi_to_note(midi) pitch_hz = librosa.note_to_hz(pitch) pitch_list.append(pitch) hz_list.append(pitch_hz)
def midi_module(audio_path, y, CQT, sr, plot=True): midi_path = re.sub(r'.wav', '.mid', audio_path) n_frames = CQT.shape[1] # Output definition(y) #Ground_truth_mat=mm.midi2mat(midi_path,len(y),n_frames,sr)[0] midi_data = pretty_midi.PrettyMIDI(midi_path) pianoRoll = midi_data.instruments[0].get_piano_roll(fs=CQT.shape[1] * 44100. / len(y)) Ground_truth_mat = ( pianoRoll[RangeMIDInotes[0]:RangeMIDInotes[1] + 1, :CQT.shape[1]] > 0) if plot: plt.figure() plt.subplot(211) lb.display.specshow(Ground_truth_mat, sr=sr, bins_per_octave=12, fmin=lb.note_to_hz('A0'), x_axis='time', y_axis='cqt_note') # Label distribution in the sequence plt.subplot(212) n_pitch_frame = np.sum(Ground_truth_mat, axis=1) plt.bar(range(RangeMIDInotes[0], RangeMIDInotes[1] + 1), n_pitch_frame / np.sum(n_pitch_frame).astype(np.float)) plt.xticks( range(RangeMIDInotes[0], RangeMIDInotes[1] + 1, 12), lb.midi_to_note(range(RangeMIDInotes[0], RangeMIDInotes[1] + 1, 12))) plt.xlabel('Midi note') plt.ylabel('Note probability') return Ground_truth_mat #(88, 10979)
def test_cq(): """ Just for testing, from librosa docu """ # Plot one octave of filters in time and frequency basis, lengths = librosa.filters.constant_q(22050) plt.figure(figsize=(10, 6)) plt.subplot(2, 1, 1) notes = librosa.midi_to_note(np.arange(24, 24 + len(basis))) for i, (f, n) in enumerate(zip(basis, notes[:12])): f_scale = librosa.util.normalize(f) / 2 plt.plot(i + f_scale.real) plt.plot(i + f_scale.imag, linestyle=':') plt.axis('tight') plt.yticks(np.arange(len(notes[:12])), notes[:12]) plt.ylabel('CQ filters') plt.title('CQ filters (one octave, time domain)') plt.xlabel('Time (samples at 22050 Hz)') plt.legend(['Real', 'Imaginary'], frameon=True, framealpha=0.8) plt.subplot(2, 1, 2) F = np.abs(np.fft.fftn(basis, axes=[-1])) # Keep only the positive frequencies F = F[:, :(1 + F.shape[1] // 2)] librosa.display.specshow(F, x_axis='linear') plt.yticks(np.arange(len(notes))[::12], notes[::12]) plt.ylabel('CQ filters') plt.title('CQ filter magnitudes (frequency domain)') plt.tight_layout() plt.show()
def transpose(label, n_semitones): '''Transpose a chord label by some number of semitones Parameters ---------- label : str A chord string n_semitones : float The number of semitones to move `label` Returns ------- label_transpose : str The transposed chord label ''' # Otherwise, split off the note from the modifier match = re.match(six.text_type('(?P<note>[A-G][b#]*)(?P<mod>.*)'), six.text_type(label)) if not match: return label note = match.group('note') new_note = librosa.midi_to_note(librosa.note_to_midi(note) + n_semitones, octave=False) return new_note + match.group('mod')
def get_label(self, index): if index < 0 or index > self.nb_classes: return None minor = index >= 12 midi = index + 12 if minor: midi = index - 12 label = librosa.midi_to_note(midi=midi, octave=False) if minor: label += 'm' return label
def to_major_minor_key(index): if not np.isscalar(index): return [to_major_minor_key(x) for x in index] minor = index >= 12 midi = index + 12 if minor: midi = index - 12 tonic = librosa.midi_to_note(midi=midi, octave=False) mode = 'minor' if minor else 'major' return tonic, mode
def get_components_score_informed(W, H, n_components, signal, pitches): for n in range(n_components): # Re-create the STFT of a single NMF component. Y = scipy.outer(W[:,n], H[n]) * signal.X_phase # Transform the STFT into the time domain. y = librosa.istft(Y) label = pitches[n] if not isinstance(label, str): label = librosa.midi_to_note(label) yield (label, ipd.Audio(y, rate=signal.sr))
def get_components_score_informed_with_onset_templates(W, H, n_components, signal, pitches): for n in range(n_components*2): # Re-create the STFT of a single NMF component. Y = scipy.outer(W[:,n], H[n]) * signal.X_phase # Transform the STFT into the time domain. y = librosa.istft(Y) if n < n_components: label = "note: " else: n = n - n_components label = "onset: " label += librosa.midi_to_note(pitches[n]) yield ('Component {} ({}):'.format(n, label), ipd.Audio(y, rate=signal.sr))
def get_notes(self, sequence): notes = [] for note_obj in sequence: note = librosa.midi_to_note(note_obj.pitch) octave = int(note[-1]) if octave < 3: note = note[0:-1] + '3' elif octave > 4: note = note[0:-1] + '4' duration = math.ceil( (note_obj.end_time - note_obj.start_time) * 4) / 4 notes.append({'note': note, 'duration': duration}) return notes
def tracktosentences(file, index): midi = mido.MidiFile(file, clip=True) matrix = GenPlot.tracktoarray(GenPlot.trackcombine(midi)[0][index]) notez = [] for _ in range(int(len(matrix) / 127)): playing = librosa.midi_to_note( GenPlot.removenegative(matrix[_ * 127:(_ + 1) * 127])) if notez != []: if notez[len(notez) - 1][0] != playing: notez.append([playing, 0]) else: notez[len(notez) - 1][1] += 1 else: notez.append([playing, 0]) return listtosentences(notez, midi.ticks_per_beat)
def midi_to_note_zeros(annotation): ''' Special function so that zeros represent silence Input: Annotation List taken straight from mtrack Output: 1d np.array containing frequencies instead of note names ''' new_values = np.array([]) for a in annotation: new_a = '0' if a != 0: new_a = librosa.midi_to_note(a) new_values = np.append(new_values, new_a) return new_values
def vocabulary(self): ''' Build the vocabulary for all key_mode strings Returns ------- labels : list list of string labels. ''' qualities = MODES + list(QUALITY.keys()) tonics = midi_to_note(list(range(12)), octave=False) labels = ['N'] for key_mode in product(tonics, qualities): labels.append('{}:{}'.format(*key_mode)) return labels
def plot_weight(model, tokens): result, attn = model.inference(tokens) result[:, 1] = 0.0 print('PD:', np.argmax(result, axis=1)) print('GT:', tokens) print( 'GT:', ' '.join( midi_to_note(x - 2) if x >= 2 else ['<n>', '<s>'][x] for x in tokens)) print('PB:', np.max(result, axis=1)) tokens_one_hot = np.eye(result.shape[1])[tokens] fig, ax = plt.subplots(nrows=2, ncols=1, sharex='all') ax[0].imshow(result.T, interpolation='nearest', aspect='auto') ax[0].invert_yaxis() ax[1].imshow(tokens_one_hot.T, interpolation='nearest', aspect='auto') ax[1].invert_yaxis() plt.show()
def create_half_tone_filterbank(N, fs, midi_start_note=43, num_oct=4): """ create half-tone filterbank """ import librosa # midi notes p = np.arange(midi_start_note, midi_start_note + 12 * num_oct) # midi notes of discrete DFT-bins p_fk = np.insert( f_to_midi_scale(np.arange(1, N/2) * fs / N), 0, 0) # differences d = np.abs(p[:, np.newaxis] - p_fk) # half-tone filterbank Hp = 0.5 * np.tanh(np.pi * (1 - 2 * d)) + 0.5 return Hp, get_chroma_labels(start_note=librosa.midi_to_note(midi_start_note, octave=False))
def get_winner(self, notes): top_chords = self.top_chords.copy() query = notes_to_chroma(librosa.midi_to_note(notes)) top_chords['chroma_dist'] = np.array([ textdistance.levenshtein(query, chord) for chord in top_chords.chroma.values ]) top_chords['dist'] = np.array([ textdistance.levenshtein(notes, chord) for chord in top_chords.midi_notes ]) top_chords['chroma_dist'] = top_chords.chroma_dist.max( ) - top_chords.chroma_dist # top_chords['dist'] = np.array([len(textdistance.lcsseq(query, chord)) for chord in top_chords.chroma.values]) candidates = top_chords[top_chords.dist == top_chords.dist.min()].sort_values( ['dist', 'chroma_dist'], ascending=False) winner = candidates.iloc[0] return winner.midi_notes
def analyse_audio(audio_file, midi_file): x, _ = librosa.load(audio_file, sr=sr) print("Music file length=%s, sampling_rate=%s" % (x.shape[0],sr)) plt.figure(figsize=(14, 5)) plt.title('Music Sample Waveplot') librosa.display.waveplot(x, sr=sr) x_stft_spectrum = lb.stft(x, n_fft=1024,hop_length=512,center=True, dtype=np.complex64) x_stft = librosa.amplitude_to_db(abs(x_stft_spectrum)) plt.figure(figsize=(14, 5)) librosa.display.specshow(lb.amplitude_to_db(x_stft, ref=np.max), sr=sr, fmin=lb.note_to_hz('A0'), x_axis='time', y_axis='linear',cmap='coolwarm') plt.title('Power spectrogram') plt.colorbar(format='%+2.0f dB') plt.tight_layout() plt.figure(figsize=(14, 5)) x_cqt = np.abs(librosa.cqt(x, sr=sr, bins_per_octave=bins_per_octave, n_bins=n_bins, fmin=lb.note_to_hz('A0'))) librosa.display.specshow(librosa.amplitude_to_db(x_cqt, ref=np.max), sr=sr, x_axis='time', y_axis='cqt_note',cmap='coolwarm') print("CQT Matrix shape", x_cqt.shape) plt.colorbar(format='%+2.0f dB') plt.title('Constant-Q power spectrum') plt.tight_layout() n_frames=x_cqt.shape[1] midi_data = pretty_midi.PrettyMIDI(midi_file) plt.figure(figsize=(12, 4)) plot_piano_roll(midi_data, 24, 84) print('There are {} time signature changes'.format(len(midi_data.time_signature_changes))) print('There are {} instruments'.format(len(midi_data.instruments))) print('Instrument 1 has {} notes'.format(len(midi_data.instruments[0].notes))) pianoRoll = midi_data.instruments[0].get_piano_roll(fs=n_frames * 44100. / len(x)) midi_mat = (pianoRoll[MIDInotes[0]:MIDInotes[1] + 1, :n_frames] > 0) print("MIDI Matrix shape", midi_mat.shape) plt.figure() librosa.display.specshow(midi_mat, sr=sr, bins_per_octave=12, fmin=lb.note_to_hz('A0'), x_axis='time', y_axis='cqt_note') n_pitch_frame=np.sum(midi_mat, axis=1) print(n_pitch_frame) plt.bar(range(MIDInotes[0],MIDInotes[1]+1),n_pitch_frame/np.sum(n_pitch_frame).astype(np.float)) plt.xticks(range(MIDInotes[0],MIDInotes[1]+1,12), lb.midi_to_note(range(MIDInotes[0], MIDInotes[1]+1,12))) plt.xlabel('Midi note') plt.ylabel('Note probability')
def enharmonic(self, key_str): '''Force the tonic spelling to fit our tonic list by spelling out of vocab keys enharmonically. Parameters ---------- key_str : str The key_mode string in jams style. Returns ------- key_str : str The key_mode string spelled enharmonically to fit our vocab. ''' key_list = key_str.split(':') # spell the tonic enharmonically if necessary if key_list[0] != 'N': key_list[0] = midi_to_note(note_to_midi(key_list[0]), octave=False) if len(key_list) == 1: key_list.append('major') return ':'.join(key_list)
def _glyph_to_note(glyph): """Converts a `Glyph` message to a `<note>` tag. Args: glyph: A `tensorflow.moonlight.Glyph` message. The glyph type should be one of `NOTEHEAD_*`. Returns: An etree `<note>` tag, or `None` if the glyph is not a notehead. Raises: ValueError: If the note duration is not a multiple of `1 / DIVISIONS`. """ if not glyph.HasField('note'): return None note = etree.Element('note') etree.SubElement(note, 'voice').text = '1' if glyph.type == musicscore_pb2.Glyph.NOTEHEAD_EMPTY: note_type = HALF elif glyph.type == musicscore_pb2.Glyph.NOTEHEAD_WHOLE: note_type = WHOLE else: index = min(len(FILLED), len(glyph.beam)) note_type = FILLED[index] etree.SubElement(note, 'type').text = note_type duration = DIVISIONS * (glyph.note.end_time - glyph.note.start_time) if not duration.is_integer(): raise ValueError('Duration is not an integer: ' + str(duration)) etree.SubElement(note, 'duration').text = str(int(duration)) pitch_match = re.match('([A-G])([#b]?)([0-9]+)', librosa.midi_to_note(glyph.note.pitch)) pitch = etree.SubElement(note, 'pitch') etree.SubElement(pitch, 'step').text = pitch_match.group(1) etree.SubElement(pitch, 'alter').text = str( ACCIDENTAL_TO_ALTER[pitch_match.group(2)]) etree.SubElement(pitch, 'octave').text = pitch_match.group(3) return note
def full_chord(noteMIDI): new_noteMIDI = noteMIDI # sort the noteMIDI list new_noteMIDI.sort() # the lowest note is the pivot, move other note down octave(s), so that # all notes are within one octave from the lowest note for i in range(len(new_noteMIDI) - 1): while new_noteMIDI[i + 1] >= new_noteMIDI[0] + 12: new_noteMIDI[i + 1] -= 12 # 12 notes in one octave # now that all notes are with in one octave of each other, remove duplicate notes, then sort again new_noteMIDI = list(set(new_noteMIDI)) new_noteMIDI.sort() # find distance between each note dis = [] for i in range(len(new_noteMIDI) - 1): dis.append(new_noteMIDI[i + 1] - new_noteMIDI[i]) # this gives us the quality and the inversion of the chord result = QUALITY.get(tuple(dis)) if result is None: return "No chord is found!" # find the root note of the chord root = librosa.midi_to_note(new_noteMIDI[result[1]], octave=False) return root + result[0]
def token_to_note(x): return midi_to_note(x - 2) if x >= 2 else ['(n)', '(s)'][x]
def __test(midi_num, note, octave, cents): note_out = librosa.midi_to_note(midi_num, octave=octave, cents=cents) assert note_out == note
def __init__( self, db_cache=config.GIT2MID_DB_ROOT, db='single_notes.pkl', sr=44100, preload_audio=False, ): ''' There are three main components in this init 1. data frame | pointing to the audio files and holding metadata 2. chord data | base that is used for generating the chordsamples 3. the actual audiofiles | when preload_audio = True a dict will be generated ''' import pandas as pd self.db_cache = db_cache self.db = db self.df = pd.read_pickle( os.path.join(self.db_cache, 'fishman/single_notes/', self.db)) # clean with blacklist self.blacklist = pd.read_table(os.path.join( self.db_cache, 'fishman/single_notes/blacklist.txt'), header=None, index_col=0).index self.df = self.df.drop(self.blacklist) # change the file paths in the df def generate_new_path(old_path, path): # print(old_path) new_root = '/'.join(path.split('/')[:-1]) filename = '/'.join(old_path.split('/')[-2:]) new_path = '/'.join((new_root, filename)) return new_path self.df.index = [ generate_new_path( x, os.path.join(self.db_cache, 'fishman/single_notes/', self.db)) for x in self.df.index ] # chord shapes to use for the chords self.chords = pd.read_json( os.path.join(self.db_cache, 'chords/chord_classes.json')) self.chords['poly'] = [len(x) for x in self.chords.midi_notes] self.top_chords = pd.read_json( os.path.join(self.db_cache, 'chords/top_chords.json')) self.top_chords['poly'] = [len(x) for x in self.top_chords.midi_notes] self.top_chords['chroma'] = [ notes_to_chroma(note) for note in librosa.midi_to_note(self.top_chords.midi_notes) ] self.sr = sr self.preload_audio = preload_audio if self.preload_audio: self.audio_dict = {} for file in self.df.index: self.audio_dict[file] = librosa.load(file, sr=44100)[0] self.verbose = False
def midi_to_note(midi): return 0 if midi == 0 else librosa.midi_to_note(midi)
def imshow_notes(x): import librosa imshow(x) plt.xticks(np.arange(12), [librosa.midi_to_note(i, octave=False) for i in range(12)])
def __test(midi_num, note, octave, cents): note_out = librosa.midi_to_note(midi_num, octave=octave, cents=cents) eq_(note_out, note)
def states_to_pianoroll(states, note_min, note_max, hop_time): """ Converts state sequence to an intermediate, internal piano-roll notation Parameters ---------- states : int Sequence of states estimated by Viterbi note_min : string, 'A#4' format Lowest note supported by this estimator note_max : string, 'A#4' format Highest note supported by this estimator hop_time : float Time interval between two states. Returns ------- output : List of lists output[i] is the i-th note in the sequence. Each note is a list described by [onset_time, offset_time, pitch]. """ midi_min = librosa.note_to_midi(note_min) midi_max = librosa.note_to_midi(note_max) states_ = np.hstack((states, np.zeros(1))) # possible types of states silence = 0 onset = 1 sustain = 2 my_state = silence output = [] last_onset = 0 last_offset = 0 last_midi = 0 for i in range(len(states_)): if my_state == silence: if int(states_[i] % 2) != 0: # Found an onset! last_onset = i * hop_time last_midi = ((states_[i] - 1) / 2) + midi_min last_note = librosa.midi_to_note(last_midi) my_state = onset elif my_state == onset: if int(states_[i] % 2) == 0: my_state = sustain elif my_state == sustain: if int(states_[i] % 2) != 0: # Found an onset. # Finish last note last_offset = i * hop_time my_note = [last_onset, last_offset, last_midi, last_note] output.append(my_note) # Start new note last_onset = i * hop_time last_midi = ((states_[i] - 1) / 2) + midi_min last_note = librosa.midi_to_note(last_midi) my_state = onset elif states_[i] == 0: # Found silence. Finish last note. last_offset = i * hop_time my_note = [last_onset, last_offset, last_midi, last_note] output.append(my_note) my_state = silence return output
def test_midi_to_note_cents_nooctave(): librosa.midi_to_note(24.25, octave=False, cents=True)