def generate_note(self, f0_info, n_duration, round_to_sixteenth=True): f0 = f0_info[0] a = remap(f0_info[1], self.cqt.min(), self.cqt.max(), 0, 1) duration = librosa.frames_to_time(n_duration, sr=self.sr, hop_length=self.hop_length) note_duration = 0.02 * np.around(duration / 0.02) # Round to 2 decimal places for music21 compatibility midi_duration = second_to_quarter(duration, self.tempo) midi_velocity = int(round(remap(f0_info[1], self.cqt.min(), self.cqt.max(), 80, 120))) if round_to_sixteenth: midi_duration = round(midi_duration * 16) / 16 try: if f0 is None: midi_note = None note_info = Rest(type=self.mm.secondsToDuration(note_duration).type) f0 = 0 else: midi_note = round(librosa.hz_to_midi(f0)) note = Note(librosa.midi_to_note(midi_note), type=self.mm.secondsToDuration(note_duration).type) note.volume.velocity = midi_velocity note_info = [note] except DurationException: if f0 is None: midi_note = None note_info = Rest(type='32nd') f0 = 0 else: midi_note = round(librosa.hz_to_midi(f0)) note = Note(librosa.midi_to_note(midi_note), type='eighth') note.volume.velocity = midi_velocity note_info = [note] midi_info = [midi_note, midi_duration, midi_velocity] n = np.arange(librosa.frames_to_samples(n_duration, hop_length=self.hop_length)) sine_wave = a * np.sin(2 * np.pi * f0 * n / float(self.sr)) return [sine_wave, midi_info, note_info]
def arousalScore(t): before = pitches[(times >= t - TUNE_SCOPE / 2) & (times <= t)] after = pitches[(times >= t) & (times <= t + TUNE_SCOPE / 2)] before = (librosa.hz_to_midi(before + 0.1) * 6 / 12).astype(int) after = (librosa.hz_to_midi(after + 0.1) * 6 / 12).astype(int) score = np.sum(after) - np.sum(before) return score / len(before)
def make_jam(freq_dict,sr,track_duration): """ this function creates a jam according to a dictionary that specifies each frequency's presence dict: keys are frequencies values are list of tuples (start_time, duration) of that frequency """ jam = jams.JAMS() # Store the track duration jam.file_metadata.duration = track_duration pitch_co = jams.Annotation(namespace='pitch_contour') note_h = jams.Annotation(namespace='note_hz') note_m = jams.Annotation(namespace='note_midi') pitch_cl = jams.Annotation(namespace='pitch_class') pitch_h = jams.Annotation(namespace='pitch_hz') pitch_m = jams.Annotation(namespace='pitch_midi') pitch_co.annotation_metadata = jams.AnnotationMetadata(data_source='synth') note_h.annotation_metadata = jams.AnnotationMetadata(data_source='synth') note_m.annotation_metadata = jams.AnnotationMetadata(data_source='synth') pitch_cl.annotation_metadata = jams.AnnotationMetadata(data_source='synth') pitch_h.annotation_metadata = jams.AnnotationMetadata(data_source='synth') pitch_m.annotation_metadata = jams.AnnotationMetadata(data_source='synth') #assign frequencies to each start_time freqs = freq_dict.keys() for f in freqs: time_dur = freq_dict[f] #list of tuples (start_time,duration) for t, dur in time_dur: pitch_co.append(time=t, duration=dur, value={"index":0,"frequency":f,"voiced":True}) note_h.append(time=t, duration=dur,value=f) note_m.append(time=t, duration=dur, value=librosa.hz_to_midi(f)) pclass = librosa.hz_to_note(f) pitch_cl.append(time=t, duration=dur,value={"tonic":pclass[:-1],"pitch":int(pclass[-1])}) pitch_h.append(time=t, duration=dur,value=f) pitch_m.append(time=t, duration=dur, value=librosa.hz_to_midi(f)) # Store the new annotation in the jam jam.annotations.append(pitch_co) jam.annotations.append(note_h) jam.annotations.append(note_m) jam.annotations.append(pitch_cl) jam.annotations.append(pitch_h) jam.annotations.append(pitch_m) return jam
def sonify_annotation(intervals, pitches, sonification_fs=8000): """Sonify a note annotation. Parameters ---------- intervals: np.ndarray shape=(n, 2) Array of note start and end times in seconds pitches: np.ndarray shape=(n,) Array of note pitches in Hz sonification_fs: float Sample rate of sonified audio. Returns ------- y_sonify: np.ndarray shape=(m,) Mono audio signal of sonified notes """ pm = pretty_midi.PrettyMIDI() inst = pretty_midi.Instrument(program=0, is_drum=False, name='piano') pm.instruments.append(inst) velocity = 100 for interval, pitch in zip(intervals, pitches): inst.notes.append( pretty_midi.Note(velocity, librosa.hz_to_midi(pitch), interval[0], interval[1])) return pm.synthesize(fs=sonification_fs)
def pitchChroma( pitches, n_class=PITCH_CHROMA_CLASS, count=PITCH_CHROMA_COUNT, hop=PITCH_CHROMA_HOP, ): """input: [frames] output: [n_class, frames]""" pitches = deepcopy(pitches) frames = pitches.shape[-1] nonVoice = pitches <= 0 # whether it's a voicing frame pitches[nonVoice] = 10 # avoid log(0) warning pitches = librosa.hz_to_midi(pitches) * n_class / 12 pitches = np.remainder(pitches.astype(int), n_class) # -1 represnets non-voice, will be ignored in bincount pitches[nonVoice] = n_class # pitches: [1, frames] pitches = np.expand_dims(pitches, axis=0) assert (pitches <= n_class).all() # XPitches: [count, frames] XPitches = librosa.feature.stack_memory(pitches, n_steps=count, delay=hop, mode="edge") assert (XPitches <= n_class).all(), "suprise! just try again the bug might gone" # res: [n_class, frames] res = np.zeros((n_class, frames)) weights = np.array([count - i for i in range(count)]) for t in range(frames): bins = np.bincount(XPitches[:, t], weights=weights, minlength=n_class) res[:, t] = bins[:n_class] return res
def synth_config( in_path, out_path, model_path, gens, mutation, elitism, var_mut ): with open(model_path, "rb") as f: model = pickle.load(f) source = sm.sound.load(in_path) lso = sm.extraction.get_sounds(source) sm.synth.POPULATION = len(lso) sm.synth.GENERATIONS = gens sm.synth.AVG_MUTATIONS = mutation sm.synth.ELITISM = elitism sm.synth.VAR_MUTATIONS = var_mut synths = sm.synth.fast_evolve(sm.synth.SoundSynth, lso, model) config = [synth.gen(so) for synth,so in zip(synths, lso)] out = "" i = 0 for so in config: _, vtrack = so.track_pitch() pitch = lr.hz_to_midi(so.get_f0()) vel = np.clip(np.mean(vtrack) * 127, 0, 127) path = os.path.dirname(out_path) fname = os.path.splitext(os.path.basename(out_path))[0] + f"_s{i}.wav" so.write(os.path.join(path, fname)) out += f"{fname}, {pitch}, {vel}\n" i += 1 with open(out_path, "w") as file: file.write(out)
def test_hz_to_midi_is_accurate(self): """Tests converting between MIDI values and their frequencies in hertz.""" hz = np.linspace(20.0, 20000.0, 128) librosa_midi = librosa.hz_to_midi(hz) with self.cached_session() as sess: tf_midi = sess.run(core.hz_to_midi(hz)) self.assertAllClose(librosa_midi, tf_midi)
def test_hz_to_midi_is_accurate(self): """Tests converting between MIDI values and their frequencies in hertz. """ hz = np.linspace(20.0, 20000.0, 128) librosa_midi = librosa.hz_to_midi(hz) th_midi = core.hz_to_midi(hz) assert np.allclose(librosa_midi, th_midi)
def midi_notes_encoding(audio): """ Compute frame-based midi encoding of audio :param audio: 1-D array of audio time series """ pitches, magnitudes = librosa.piptrack(audio) pitches = np.transpose(pitches) magnitudes = np.transpose(magnitudes) lc = np.zeros((pitches.shape[0], 88), dtype=np.float32) for i in range(pitches.shape[0]): # Count non-zero entries of pitches nz_count = len(np.nonzero(pitches[i])[0]) # Keep a maximum of 6 detected pitches num_ind_to_keep = min(nz_count, 6) ind_of_largest_pitches = np.argpartition( magnitudes[i], -num_ind_to_keep)[-num_ind_to_keep:] \ if num_ind_to_keep != 0 else [] # Convert the largest pitches to midi notes overtone_limit = librosa.midi_to_hz(96)[0] ind_of_largest_pitches = filter( lambda x: pitches[i, x] <= overtone_limit, ind_of_largest_pitches) midi_notes = librosa.hz_to_midi(pitches[i, ind_of_largest_pitches]) midi_notes = midi_notes.round() # Normalize magnitudes of pitches midi_mags = magnitudes[i, ind_of_largest_pitches] / \ np.linalg.norm(magnitudes[i, ind_of_largest_pitches], 1) np.put(lc[i], midi_notes.astype(np.int64) - [9], midi_mags) return lc
def pitch_features(segment, hop_length=1024, nor=True, to_midi=False): if nor is True and to_midi is True: to_midi = not to_midi hop_length = hop_length p_features = np.array([]) pitches, magnitudes = librosa.piptrack(y=segment, sr=global_sr, fmin=20, fmax=8000, n_fft=hop_length * 2, hop_length=hop_length) p = [ pitches[magnitudes[:, i].argmax(), i] for i in range(0, pitches.shape[1]) ] pitch0 = np.array(p) # shape (305,) pitch = np.transpose(pitch0) p_features = np.hstack((p_features, max(20, np.amin(pitch, 0)))) p_features = np.hstack((p_features, np.amax(pitch, 0))) p_features = np.hstack((p_features, np.median(pitch, 0))) p_features = np.hstack((p_features, np.mean(pitch, 0))) p_features = np.hstack((p_features, np.std(pitch, 0))) # p_features = np.hstack((p_features, np.var(pitch, 0))) if nor: p_features = normalize(p_features.reshape(1, -1)) if to_midi: p_features = np.int_(librosa.hz_to_midi(p_features)) return p_features
def generate_sine_midi_note(f0_info, sr, n_duration): f0 = f0_info[0] A = remap(f0_info[1], CdB.min(), CdB.max(), 0, 1) duration = librosa.frames_to_time(n_duration, sr=fs, hop_length=hop_length) # Generate music21 note note_duration = 0.02 * np.around( duration / 2 / 0.02) # Round to 2 decimal places for music21 compatibility midi_velocity = int(round(remap(f0_info[1], CdB.min(), CdB.max(), 0, 127))) if f0 == None: try: note_info = Rest(type=mm.secondsToDuration(note_duration).type) except DurationException: note_info = None f0 = 0 else: midi_note = round(librosa.hz_to_midi(f0)) try: note = Note(midi_note, type=mm.secondsToDuration(note_duration).type) note.volume.velocity = midi_velocity note_info = [note] except DurationException: note_info = None if note_info is None: return None # Generate Sinewave n = np.arange(librosa.frames_to_samples(n_duration, hop_length=hop_length)) sine_wave = A * np.sin(2 * np.pi * f0 * n / float(sr)) return [sine_wave, note_info]
def get_notes(WAVE_OUTPUT_FILENAME, file): os.system("aubiopitch -i" + WAVE_OUTPUT_FILENAME + " -r 44100 -p " + args.method + " -H 128 > " + file + ".txt") f = open(file + '.txt', 'r') note = np.array(f.read().split()[1::2]).astype(float) output_MIDI = [] for j in range(0, len(note)): if (note[j] != 0): d = librosa.hz_to_midi(note[j]) d = round(d, 0) output_MIDI.append(d) s = "" for i in range(0, len(output_MIDI) - 1): if (output_MIDI[i] < output_MIDI[i + 1]): if ((output_MIDI[i + 1] - output_MIDI[i]) <= 2): s += "u" else: s += "U" if (output_MIDI[i] == output_MIDI[i + 1]): s += "S" if (output_MIDI[i] > output_MIDI[i + 1]): if ((output_MIDI[i] - output_MIDI[i + 1]) <= 2): s += "d" else: s += "D" return s
def test_hz_to_midi_is_accurate(self): """Tests converting between MIDI values and their frequencies in hertz.""" hz = np.linspace(0.0, 20000.0, 128) librosa_midi = librosa.hz_to_midi(hz) librosa_midi = tf.where(tf.less_equal(hz, 0.0), 0.0, librosa_midi) tf_midi = core.hz_to_midi(hz) self.assertAllClose(librosa_midi, tf_midi)
def hz_to_MIDI(self, varMidi, varMusica): y, sr, wav = self.load(varMusica.getAntigoWav(), varMusica.getOitava()) tempo, beats = librosa.beat.beat_track(onset_envelope=wav[:200], sr=sr) varMusica.GeneralMIDI = librosa.hz_to_midi(tempo) print("varMusica general Midi {}".format(varMusica.GeneralMIDI)) varMidi.criaNovoMIDI(varMusica.GeneralMIDI) self.criaWav(sys.argv[3], 'novissimo.wav', varMusica)
def get_notes(filename, duration): y, sr = librosa.load(filename) fmin = 73 fmax = 1108 n_bins = 256 harmonic = librosa.effects.harmonic(y) onsets = librosa.onset.onset_detect(harmonic) # get silence states rms = librosa.feature.rms(y=harmonic)[0] r_normalized = (rms - 0.01) / (np.std(rms) + 1e-9) p = np.exp(r_normalized) / (1 + np.exp(r_normalized)) transition = librosa.sequence.transition_loop(2, [0.5, 0.6]) full_p = np.vstack([1 - p, p]) states = librosa.sequence.viterbi_discriminative(full_p, transition) # drop silent onsets onsets_filtered = onsets[states[onsets] > 0] # silence start borders silence = np.nonzero(states[:-1] - states[1:] > 0)[0] # note borders borders = np.hstack([silence.reshape(1, -1), onsets_filtered.reshape(1, -1)])[0] borders = np.sort(borders) # get frequencies and aggregate them pitches, magnitudes = librosa.piptrack(harmonic, sr=sr, fmin=fmin, fmax=fmax) freq = pitches.max(axis=-1) bins = np.argmax(magnitudes, axis=0) bins_sync = librosa.util.sync(bins, borders, aggregate=np.median) states_sync = librosa.util.sync(states, borders, aggregate=np.median) pitch_sync = freq[bins_sync] pitch_sync[pitch_sync == 0] = 1e-6 # get notation and midi keys notes = librosa.hz_to_note(pitch_sync) midi = list(librosa.hz_to_midi(pitch_sync)) # check pauses pauses = np.nonzero(states_sync == 0)[0] for x in list(pauses): notes[int(x)] = 'P' midi[int(x)] = 'P' # check wrong notes for i, note in enumerate(notes): if note[:-1] not in notes_to_numbers.keys(): notes[i] = 'P' # add borders to borders and define notes lengths borders = np.append(borders, pitches.shape[-1]) borders = np.concatenate([np.array([0]), borders]) lengths = borders[1:] - borders[:-1] bpm = librosa.beat.tempo(y, sr=sr) melody = dict(notes=notes, lengths=list(lengths), midi=midi, bpm=bpm, duration=duration, raw_filename=filename.split('/')[-1]) return melody
def __init__(self, freq, start_time, duration, volume=1): # detremine note and MIDI value using librosa self._actual_freq = freq self.freq = librosa.note_to_hz(librosa.hz_to_note(freq)) self.note = librosa.hz_to_note(self.freq) self.midi = int(librosa.hz_to_midi(self.freq)) self.start_time = start_time self.duration = duration self.volume = volume
def load_score(score_file): dom = parse(score_file) xml_notes = dom.getElementsByTagName("note") notes=[] durations=[] for note in xml_notes: if is_fermata(note): print 'fermata' durations.append(get_duration(note)) if is_rest(note): notes.append('0') else: if is_accidental(note): alter_aux = get_alter(note) if alter_aux == '1': notes.append(get_step(note) + '#' + get_octave(note)) if alter_aux == '2': notes.append(get_step(note) + '##' + get_octave(note)) if alter_aux == '-1': notes.append(get_step(note) + 'b' + get_octave(note)) if alter_aux == '-2': notes.append(get_step(note) + 'bb' + get_octave(note)) else: notes.append(get_step(note) + get_octave(note)) durations=np.array(durations,dtype='int16') cr = csv.reader(open("../traditional_dataset/note_convertion.csv","rb")) notation=[] frequency=[] for row in cr: notation.append(row[0]) frequency.append(row[1]) frequency = np.array(frequency, 'float64') i=0 melo = np.empty([0,]) for note in notes: if note=='0': for k in range(0,durations[i]): melo = np.r_[melo,0] else: for k in range(0,durations[i]): melo = np.r_[melo,frequency[notation.index(note)]] i=i+1 score = lr.hz_to_midi(melo) np.place(score,score==-np.inf,0) return score, notes
def evolve(lso, n_generations, elitism, mut_prob, fitness_weights, fitness_params): npop = len(lso) population = [(so, fitness(so, fitness_weights, fitness_params)) for so in lso] for _ in range(n_generations): n_child = round(npop * (1 - elitism)) pot_parents = copy.deepcopy(population) childs = [] while len(childs) < n_child: if len(pot_parents) < 2: pot_parents = copy.deepcopy(population) parent0 = random.choices(pot_parents, weights=[ score for _, score in pot_parents ], k=1)[0] pot_parents.remove(parent0) parent1 = random.choices(pot_parents, weights=[ score for _, score in pot_parents ], k=1)[0] pot_parents.remove(parent1) child = crossover(parent0[0], parent1[0]) while random.uniform(0, 1) < mut_prob: mut = random.choice(mutations) print(f"Mutating: {mut.__name__}") new_child = mut(child) if not np.isnan(new_child.samples).any(): child = new_child else: print("Mutation Failed") try: child = SoundObject( lr.effects.trim(child.get_normalize_to(1.0))) except: continue fit = fitness(child, fitness_weights, fitness_params) if np.isnan(fit) or np.isinf(fit): continue childs.append((child, fit)) population = sorted(population, key=lambda ind: ind[1], reverse=True)[:int(elitism * npop)] + childs print(max([s for _, s in population])) out = [] for so, _ in population: ptrack, mtrack = so.track_pitch() pitch = lr.hz_to_midi(np.mean(ptrack)) velocity = np.clip(np.mean(mtrack) * 127, 0, 127) out.append((so, pitch, velocity)) return out
def compute_adjusted_features( audio_features, auto_adjust, loudness_db_shift, f0_octave_shift, f0_confidence_threshold, ): """Compute resynthetized audio""" audio_features_mod = {k: v.copy() for k, v in audio_features.items()} if auto_adjust: # Adjust the peak loudness. l = audio_features["loudness_db"] model_ld_avg_max = { "Violin": -34.0, "Flute": -45.0, "Flute2": -44.0, }[instrument_model] ld_max = np.max(audio_features["loudness_db"]) ld_diff_max = model_ld_avg_max - ld_max audio_features_mod = shift_ld(audio_features_mod, ld_diff_max) # Further adjust the average loudness above a threshold. l = audio_features_mod["loudness_db"] model_ld_mean = { "Violin": -44.0, "Flute": -51.0, "Flute2": -53.0, }[instrument_model] ld_thresh = -50.0 ld_mean = np.mean(l[l > ld_thresh]) ld_diff_mean = model_ld_mean - ld_mean audio_features_mod = shift_ld(audio_features_mod, ld_diff_mean) # Shift the pitch register. model_p_mean = { "Violin": 73.0, "Flute": 81.0, "Flute2": 74.0, }[instrument_model] p = librosa.hz_to_midi(audio_features["f0_hz"]) p[p == -np.inf] = 0.0 p_mean = p[l > ld_thresh].mean() p_diff = model_p_mean - p_mean p_diff_octave = p_diff / 12.0 round_fn = np.floor if p_diff_octave > 1.5 else np.ceil p_diff_octave = round_fn(p_diff_octave) audio_features_mod = shift_f0(audio_features_mod, p_diff_octave) audio_features_mod = shift_ld(audio_features_mod, loudness_db_shift) audio_features_mod = shift_f0(audio_features_mod, f0_octave_shift) audio_features_mod = mask_by_confidence(audio_features_mod, f0_confidence_threshold) return audio_features_mod
def get_Z(fmin, hop_length, n_bins_per_octave, n_octaves, stem): melody_f0s = di.symbolic.get_melody(stem) melody_annotation_hop = 256 melody_downsampling = hop_length / melody_annotation_hop melody_range = xrange(0, len(melody_f0s), melody_downsampling) melody_f0s = melody_f0s[melody_range] midis = librosa.hz_to_midi(melody_f0s) midis[np.isinf(midis)] = 0.0 track_activations = np.vstack(stem.track.activations_data)[:, 1:] stem_id = int(stem.name[1:]) - 1 activations = track_activations[:, stem_id] activation_hop = 2048 activation_upsampling = activation_hop / hop_length activations = activations.repeat(activation_upsampling) n_bins = n_bins_per_octave * n_octaves n_frames = len(activations) Z = np.zeros((n_bins, n_frames), np.float32) for frame_id in range(len(midis)): bin_id = int(midis[frame_id] - librosa.hz_to_midi(fmin)[0]) if bin_id >= 0: Z[bin_id, frame_id] = activations[frame_id] return Z
def plot_audio(audio_features, audio_features_mod, **kwargs): """Plot audio features in matplotlib""" legend = ["Audio features", "Resynth audio"] fig, ax = plt.subplots(nrows=3, ncols=1, sharex=True, figsize=(6, 4)) ax[0].plot(audio_features["loudness_db"]) ax[0].plot(audio_features_mod["loudness_db"]) ax[0].set_ylabel("loudness_db") ax[0].legend(legend) ax[1].plot(librosa.hz_to_midi(audio_features["f0_hz"])) ax[1].plot(librosa.hz_to_midi(audio_features_mod["f0_hz"])) ax[1].set_ylabel("f0 [midi]") ax[1].legend(legend) ax[2].plot(audio_features_mod["f0_confidence"]) ax[2].plot( np.ones_like(audio_features_mod["f0_confidence"]) * f0_confidence_threshold) ax[2].set_ylabel("f0 confidence") ax[2].set_xlabel("Time step [frame]") ax[2].legend(legend) return fig, ax
def plotMel(timesList, freqsList, title='', grid=(44100, 2048, 512)): for i, (times, freqs) in enumerate(zip(timesList, freqsList)): midis = np.zeros([len(freqs)]) midis[freqs > 0] = librosa.hz_to_midi(freqs[freqs > 0]) seminotes = midis.astype(int) + 0.5 plt.plot(times[midis > 0], midis[midis > 0], label=f'midi {i}') # plt.plot(times, seminotes, label=f'seminote {i}') sr, n_fft, hop_length = grid frames = librosa.time_to_frames(times[-1], sr, hop_length, n_fft) for t in librosa.frames_to_time(range(frames), sr, hop_length, n_fft): plt.axvline(x=t, color='g') for f in librosa.fft_frequencies(sr, n_fft): if f >= np.min(freqs[freqs > 0]) * 0.9 and f <= np.max(freqs) * 1.1: plt.axhline(y=librosa.hz_to_midi(f), color='g') plt.xlabel('time') plt.ylabel('midi note') plt.legend() plt.title(f'melody:{title}') plt.show()
def test_cq_to_chroma(n_octaves, semitones, n_chroma, fmin, base_c, window): bins_per_octave = 12 * semitones n_bins = n_octaves * bins_per_octave if np.mod(bins_per_octave, n_chroma) != 0: ctx = pytest.raises(librosa.ParameterError) else: ctx = dnr() with ctx: # Fake up a cqt matrix with the corresponding midi notes if fmin is None: midi_base = 24 # C2 else: midi_base = librosa.hz_to_midi(fmin) midi_notes = np.linspace( midi_base, midi_base + n_bins * 12.0 / bins_per_octave, endpoint=False, num=n_bins, ) # We don't care past 2 decimals here. # the log2 inside hz_to_midi can cause problems though. midi_notes = np.around(midi_notes, decimals=2) C = np.diag(midi_notes) cq2chr = librosa.filters.cq_to_chroma( n_input=C.shape[0], bins_per_octave=bins_per_octave, n_chroma=n_chroma, fmin=fmin, base_c=base_c, window=window, ) chroma = cq2chr.dot(C) for i in range(n_chroma): v = chroma[i][chroma[i] != 0] v = np.around(v, decimals=2) if base_c: resid = np.mod(v, 12) else: resid = np.mod(v - 9, 12) resid = np.round(resid * n_chroma / 12.0) assert np.allclose(np.mod(i - resid, 12), 0.0), i - resid
def get_df_audio(sep, div, bmp, sr): audio_list = [] for i in range(0, len(sep)): duration = librosa.core.get_duration(sep[i]) start = librosa.samples_to_time(div[i, 0]) end = librosa.samples_to_time(div[i, 1]) pitch = estimate_pitch_fft(sep[i], sr) #pitch = estimate_pitch(sep[i], sr) midi = librosa.hz_to_midi(pitch) audio_list.append([start, end, duration, pitch, midi, bmp]) df = pd.DataFrame( audio_list, columns=['start', 'end', 'duration', 'pitch', 'midi', 'tempo']) return df
def save(self, out): out_txt = "" i = 0 for so, fr, mag, _ in self.sounds: pitch = lr.hz_to_midi(fr) vel = np.clip(mag * 127, 0, 127) path = os.path.dirname(out) fname = os.path.splitext(os.path.basename(out))[0] + f"_s{i}.wav" so.write(os.path.join(path, fname)) out_txt += f"{fname}, {pitch}, {vel}\n" i += 1 with open(out, "w") as file: file.write(out_txt)
def __extract_notes(summary_spectrum, fs): ''' Extracts the significant F0s from the summary spectrum of a frame. @param summary_spectrum A numpy array representing the processed frequency spectrum of a signal frame. @param fs The sampling frequency in Hz. @return A list of detected F0s in Hz. ''' N = summary_spectrum.size residual_spectrum = np.copy(summary_spectrum) detected = [] detected_saliences = [] if __salience(summary_spectrum, fs, 1 / FMAX, 1 / FMIN) < SILENCE_THRESHOLD: return [] previous_significance = 0 while True: # estimation tau, s = __max_salience(residual_spectrum, fs) detected_saliences.append(s) fc = 1 / tau # cancellation subtracted_signal = np.zeros(residual_spectrum.size) for i in range(1, 21): partial_frequency = i * fc time_vector = np.linspace(0, N / fs, N) wave = np.sin(2 * np.pi * partial_frequency * time_vector) subtracted_signal = np.add( subtracted_signal, __hamming_magnitude(fc, fs, summary_spectrum.size) * __weight(tau, tau, i, fs) * wave) subtracted_signal = abs( np.fft.fft(subtracted_signal, n=subtracted_signal.size)) fcy_vector = fs * np.arange(0, N) / N residual_spectrum = residual_spectrum - subtracted_signal residual_spectrum[residual_spectrum < 0] = 0 # check if the F0 is significant significance = sum(detected_saliences) / (len(detected_saliences)** 0.66) if significance > previous_significance * SIGINC: previous_significance = significance detected.append(int(round(librosa.hz_to_midi(fc)))) else: return detected
def sequencer_config(in_path, out_path, n): source = sm.sound.load(in_path) model = sm.mustruct.build_model(n, source) model = [[( int(lr.hz_to_midi(note.pitch)) if note.pitch > 0.0 else 0, int(np.clip(note.mag, 0, 1) * 127), round(note.dur, 3) ) for note in entry] for entry in model] out = "" for entry in model: for note in entry: out += f"{note[0]}, {note[1]}, {note[2]}; " out += "\n" with open(out_path, "w") as file: file.write(out)
def hz_to_midi_zeros(annotation): ''' Special function so that zeros represent silence Input: Annotation List taken straight from mtrack Output: 1d np.array containing frequencies instead of note names ''' new_values = np.array([]) for a in annotation: new_a = 0 if a != 0: new_a = librosa.hz_to_midi(a) new_values = np.append(new_values, new_a) return new_values
def play(self, npitch, nmag, ndur): note = Note(npitch, nmag, ndur) if note.pitch <= 0: samps = np.zeros(int(note.dur * sm.sound.SAMPLE_RATE)) return SoundObject(samps) n_samples = int(note.dur * sm.sound.SAMPLE_RATE) factors = [] for so, pitch, mag, dur in self.sounds: p_dif = (note.pitch - pitch)**2 m_dif = (note.mag - mag)**2 d_dif = (note.dur - dur)**2 wei = 1 / np.sqrt(p_dif + m_dif + d_dif) samps = so.samples if samps.size > n_samples: samps = samps[:n_samples] elif samps.size < n_samples: samps = np.concatenate( [samps, np.zeros(n_samples - samps.size)]) factors.append([samps, wei]) total = sum([w for _, w in factors]) factors = sorted(factors, key=lambda fac: fac[1])[-MAX_SAMPLES:] out = np.zeros(n_samples) for samps, wei in factors: out += samps * wei / total pitch = SoundObject(out).get_f0() if pitch > 0: out = lr.effects.pitch_shift( out, sm.sound.SAMPLE_RATE, lr.hz_to_midi(note.pitch) - lr.hz_to_midi(pitch)) oso = sm.effects.band_pass(SoundObject(out), note.pitch, 20000) return oso
def showMidi(melfile, srcDir=None): if srcDir is not None: files = os.listdir(srcDir) melFiles = list(map(lambda x: os.path.join(srcDir, x), files)) else: melFiles = [melfile] midis = np.array([]) for melfile in melFiles: times, freqs = loadMel(melfile) x = librosa.hz_to_midi(freqs[freqs > 0]) midis = np.concatenate([midis, x]) plt.hist(midis, bins=25) plt.xlabel('midi') plt.show() return midis
def __extract_pitches_from_onsets(signal, fs): filter_fcies = __filter_center_fcies() filter_bands = __filter_bandwidths(filter_fcies) size_of_frame = int(FRAME_SIZE * fs) s = np.zeros(signal.size) for i in range(0, len(filter_fcies)): num_coefs_1, denom_coefs_1, num_coefs_2, denom_coefs_2 = __coefficients( filter_fcies[i], fs, filter_bands[i]) # filter twice with the first filter and twice with the second c = sig.lfilter(num_coefs_1, denom_coefs_1, signal) c = sig.lfilter(num_coefs_1, denom_coefs_1, c) c = sig.lfilter(num_coefs_2, denom_coefs_2, c) c = sig.lfilter(num_coefs_2, denom_coefs_2, c) # apply neural transduction operations c = __dynamic_compression(c, fs) c = __fwr_and_filter(c, fs, filter_fcies[i]) s += c # detect onsets onsets = librosa.onset.onset_detect(signal, fs, units="samples") # detect notes in the frame following every onset channels = [] for i in range(16): channels.append([Melody([])]) for onset in onsets: f = __frame_spectrum(s, fs, onset) tracked = __extract_notes(f, fs) counter = 1 # track all notes until they are not detected anymore to get durations while not len(tracked) == 0: f = __frame_spectrum(s, fs, onset + counter * size_of_frame) F0s = __extract_notes(f, fs) for f0 in tracked: if f0 not in F0s: duration = counter * FRAME_SIZE midi_pitch = int(round(librosa.hz_to_midi(f0))) note = Note(duration, midi_pitch, 64) __add_to_channel(channels[0], note, onset / fs) tracked.remove(f0) for i in range(len(channels)): channels[i] = Channel(Chord(channels[i]), 0, 0) return Chord(channels)
def __test(n_bins, bins_per_octave, n_chroma, fmin, base_c, window): # Fake up a cqt matrix with the corresponding midi notes if fmin is None: midi_base = 24 # C2 else: midi_base = librosa.hz_to_midi(fmin) midi_notes = np.linspace(midi_base, midi_base + n_bins * 12.0 / bins_per_octave, endpoint=False, num=n_bins) # We don't care past 2 decimals here. # the log2 inside hz_to_midi can cause problems though. midi_notes = np.around(midi_notes, decimals=2) C = np.diag(midi_notes) cq2chr = librosa.filters.cq_to_chroma(n_input=C.shape[0], bins_per_octave=bins_per_octave, n_chroma=n_chroma, fmin=fmin, base_c=base_c, window=window) chroma = cq2chr.dot(C) for i in range(n_chroma): v = chroma[i][chroma[i] != 0] v = np.around(v, decimals=2) if base_c: resid = np.mod(v, 12) else: resid = np.mod(v - 9, 12) resid = np.round(resid * n_chroma / 12.0) assert np.allclose(np.mod(i - resid, 12), 0.0), i-resid
gt = np.empty([len(timestamps),],'float64') for i in range(1,len(onset)): while (j<len(timestamps) and (timestamps[j])>=onset[i-1] and (timestamps[j])<=onset[i]): gt[j]=melo[i-1] j=j+1 plt.figure(figsize=(18,6)) plt.plot(timestamps, melody_hz) plt.plot(timestamps, gt) plt.xlabel('Time (s)') plt.ylabel('Notes') plt.yscale('log') plt.yticks(frequency, notation) plt.ylim(ymax = 2350 , ymin = 246) plt.axis( ) plt.show() #%% midigt = lr.hz_to_midi(gt); melonotes = lr.hz_to_midi(melody_hz); int_melonotes=np.round(melonotes) plt.figure() plt.plot(timestamps,int_melonotes,'.-',color='blue', lw=0.7) plt.plot(timestamps,midigt,'green', lw=1.4) plt.plot(timestamps,melonotes, 'red', lw=0.3) plt.fill_between(timestamps, midigt, int_melonotes, facecolor='cyan', label='diference', alpha=0.2) plt.grid(b=True, which='major', color='black', axis='y', linestyle='-') plt.grid(b=True, which='minor', color='black', axis='y', linestyle='-', alpha=0.3) plt.minorticks_on()
def pitch_extraction(audio, fs, win, hop): audio = audio.astype('float32', copy=False) win_s=win hop_s=hop tolerance = 0.5 pitch_o = pitch("yin", win_s, hop_s, fs) pitch_o.set_unit("Hz") pitch_o.set_tolerance(tolerance) pitches = [] confidences = [] # total number of frames read total_frames = len(audio)/win_s for i in range(0,total_frames-1): samples = audio[i*win_s:(i+1)*win_s] p = pitch_o(samples)[0] #pitch = int(round(pitch)) confidence = pitch_o.get_confidence() #if confidence < 0.8: pitch = 0. #print "%f %f %f" % (total_frames / float(samplerate), pitch, confidence) pitches += [p] confidences += [confidence] timestamps = np.arange(len(pitches)) * float(win_s)/fs pitches = np.array(pitches) melody_hz = copy.deepcopy(pitches) melody_hz[pitches<=0] = 0 melody_hz[pitches>1200] = 0 melonotes = lr.hz_to_midi(melody_hz); int_melonotes=np.round(melonotes) int_melonotes[int_melonotes<58] = 0 int_melonotes[int_melonotes>96] = 0 #ONSET DETECTION FROM PITCH CONTOUR onset_detection=np.zeros([len(int_melonotes,)], dtype='int8') M=0; m=0; k=0; #onset_detection[0]=0 for i in range(0,len(int_melonotes)-1): M=M+1 k=k+1 f0_mean=np.sum(int_melonotes[m:m+M])/float(M) if (np.abs(f0_mean-int_melonotes[k])>0.2) : onset_detection[k-1]=-1 onset_detection[k]=1 m=k+1 M=1 else: onset_detection[k]=0 limits=np.where(onset_detection==1) #PITCH CORRECTION WITH ONSET DETECTION filtrated_pitch=int_melonotes.copy() for i in range(0, len(limits[0])-1): aux=limits[0][i] aux2=limits[0][i+1] filtrated_pitch[aux:aux2] = np.median(filtrated_pitch[aux:aux2]) filtrated_pitch=np.round(filtrated_pitch) filtrated_pitch[filtrated_pitch<58] = 0 filtrated_pitch[filtrated_pitch>96] = 0 return filtrated_pitch, timestamps
def test_hz_to_midi(): assert np.allclose(librosa.hz_to_midi([55, 110, 220, 440]), [33, 45, 57, 69])