def __test(y, sr, S, n_fft, hop_length, fmin, fmax, threshold): pitches, mags = librosa.piptrack(y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length, fmin=fmin, fmax=fmax, threshold=threshold)
def get_librosa_pitch(signal, fs, window): pitches, magnitudes = librosa.piptrack(y=signal.astype('float'), sr=fs, n_fft=int(window), hop_length=int(window/10)) pitch_pos = np.argmax(magnitudes, axis=0) pitches_final = [] for i in range(len(pitch_pos)): pitches_final.append(pitches[pitch_pos[i], i]) pitches_final = np.array(pitches_final) pitches_final[pitches_final > 2000] = 0 # cut high pitches return mf(pitches_final, 3) # use medfilt for smoothing
def get_pitch(filename= read_and_write_audio()): x, sr_ = librosa.load(filename) pitches, mags = librosa.piptrack(y=x,sr=sr_) index = mags[:,128].argmax() pitch = pitches[index, 128] note_name = librosa.hz_to_note(pitch) if len(note_name) > 2: return note_name[0:2] return note_name[0]
def get_pitch(filename): x, sr_ = librosa.load(filename) pitches, mags = librosa.piptrack(y=x, sr=sr_) index = mags[:, 128].argmax() pitch = pitches[index, 128] note_name = librosa.hz_to_note(pitch) if len(note_name) > 2: return f'{note_name[0]}#' return note_name[0]
def __test(S, freq): pitches, mags = librosa.piptrack(S=S, fmin=100) idx = (mags > 0) assert len(idx) > 0 recovered_pitches = pitches[idx] # We should be within one cent of the target assert np.all(np.abs(np.log2(recovered_pitches) - np.log2(freq)) <= 1e-2)
def baseline_tracking(audio_file, result_file=None): start = time.clock() y, sr = librosa.load(audio_file, sr=database_sr) print('Audio file loaded: ' + audio_file) print('{:f}s for loading the audio file.'.format(time.clock() - start)) start = time.clock() pitches, magnitudes = librosa.piptrack(y=y, sr=database_sr) mag_thresh = 2 * np.mean(magnitudes) / 3 d_range, time_range = pitches.shape pitches_max, magnitudes_max = \ extract_pitch_max(pitches, magnitudes, time_range) ret_pitch = [] if result_file: file = open(result_file, 'w+') # print out tracked notes over time for t in range(time_range): # filter out frequencies with zero value pitch_t = pitches_max[t] # filter out notes that are too weak mag_t = magnitudes_max[t] if debug: print("pitch_idx shape:") print(pitch_idx.shape) print("mag_idx shape:") print(mag_idx.shape) print("idx shape:") print(merged_idx.shape) # only print at a time t if there're notes present # if pitch_t != 0 and mag_t > mag_thresh: if True: ret_pitch.append(pitch_t) if result_file: file.write(str(t) + ',' + str(pitch_t) + '\n') elif debug: print(t) print(pitch_t) print() if result_file: print("Saved in " + result_file) file.close() print("Stat:") print('{:f}s for piptrack'.format(time.clock() - start)) print("len(pitches): {:d}".format(len(pitches))) print("pitch shape:") print(pitches.shape) print("magnitudes shape:") print(magnitudes.shape) print("sampling rates:") print(sr) return pitches, ret_pitch
def onlyPitch(X, sample_rate): stft = np.abs(librosa.stft(X)) pitches, magnitudes = librosa.piptrack(X, sr=sample_rate, S=stft, fmin=70, fmax=400) pitch = [] for i in range(magnitudes.shape[1]): index = magnitudes[:, i].argmax() pitch.append(pitches[index, i]) return np.asarray(pitch)
def get_pitches_by_beats(y, starts, ends): pitches = [] for b in range(len(starts)): seg1 = y[np.int(starts[b] * global_sr):np.int(ends[b] * global_sr)] pitches1, magnitudes1 = librosa.piptrack(y=seg1, sr=global_sr, fmin=20, fmax=8000, hop_length=2048) max_p = np.median((extract_max(pitches1))) pitches.append(max_p) return np.array(pitches)
def load_music(filepath): print("=========loac music %s=========" % (filepath)) # 节拍点的音高 beat_pitches = [] # 节拍点的八音12度的强度 beat_chroma = [] # 节拍点的八音12度最强度的索引 beat_chroma_max_index = [] y, sr = librosa.load(filepath, sr=None) S = np.abs(librosa.stft(y)) # 起点强度(音符按键起始点) onset_env = librosa.onset.onset_strength(y=y, sr=sr) # 节拍点(帧索引) tempo, beats = librosa.beat.beat_track(onset_envelope=onset_env, sr=sr) # 节拍点 (时间点,单位秒) beat_times = librosa.frames_to_time(beats, sr=sr) # 频率、振幅强度,横坐标是采样点,纵坐标是帧 pitches, magnitudes = librosa.piptrack(S=S, sr=sr) # 八音12度的强度 chroma_stft = librosa.feature.chroma_stft(S=S, sr=sr) for beat in beats: index = magnitudes[:, beat].argmax() pitch = pitches[index, beat] beat_pitches.append(float(pitch)) chromas = [] max_chroma = 0 max_chroma_index = 0 chr_st_index = 0 for chr_st in chroma_stft: chromas.append(chr_st[beat]) if chr_st[beat] > max_chroma: max_chroma = chr_st[beat] max_chroma_index = chr_st_index chr_st_index += 1 beat_chroma.append(chromas) beat_chroma_max_index.append(max_chroma_index) return { "beat_times": beat_times.tolist(), "beat_pitches": beat_pitches, "beat_chroma_max_index": beat_chroma_max_index }
def pitch_wave(path): """ sr, audio = wavfile.read(path) time, frequency, confidence, activation = crepe.predict(audio, sr, viterbi=True) print(time, frequency, confidence, activation) return(time, frequency, confidence, activation) """ print("-*-*-*-*-pitch : wave.ver-*-*-*-*-") y, sr = librosa.load(path) pitches, magnitudes = librosa.piptrack(y=y, sr=sr) plt.title("pitches") plt.plot(pitches) plt.show()
def bank(indata): # segment cycle into quarter-notes. # llen / 4 sized segments. # try to find the pitch of each quarter note. # save the audio to bank - marked with its pitch l, _ = indata.shape #q_size = int(l/4) q_size = int(l / 8) q = np.zeros((q_size, 2), dtype='float32') count = 0 for x in range(1, 8): q[:] = indata[count:count + q_size] pitches, magnetude = librosa.piptrack(y=q[:, 1], sr=SAMPLE_RATE, fmin=250.0, fmax=1050.0) _, ts = magnetude.shape print(ts) print(pitches.shape) index = max(magnetude[:, i].argmax() for i in range(ts)) prev = magnetude[:, 0].argmax() best_ts = 0 for i in range(ts): if magnetude[:, i].argmax() > prev: best_ts = i prev = magnetude[:, i].argmax() pitch = pitches[index, best_ts] if pitch != 0.0: note_info = librosa.hz_to_note(pitch, cents=True) # if your pretty close - >< 40cent its added to the notebank print(note_info) cents = int(note_info[0].replace('+', '-').split('-', 1)[-1]) print("Cents: ", cents) if cents < 25: note = librosa.hz_to_note(pitch, octave=False) print("Bank: ", note) keyId = note_names.index(note[0]) #if keyId in noteBank: #noteBank[keyId].append(q) #else: #noteBank[keyId] = list(q) noteBank[keyId] = q count += q_size
def cal_pitch(audio_path='/data/pytong/wav/itg_0603/EE1811306983070-interview-iOS-2020-05-29-09-06-23.wav', sample_rate=16000, hop=160): signal, rate = librosa.load(audio_path, sr=sample_rate, mono=True) pitches, magnitudes = librosa.piptrack(y=signal, sr=rate, hop_length=hop) pitch_result = [] pitches_reverted = list(zip(*pitches)) magnitudes_reverted = list(zip(*magnitudes)) for cur_pitch_list, cur_magnitude_list in zip(pitches_reverted, magnitudes_reverted): index = cur_magnitude_list.index(max(cur_magnitude_list)) pitch = cur_pitch_list[index] pitch_result.append(pitch) print(len(pitch_result), pitch_result[0:2]) return pitch_result
def extract_features(self, filepath, filename, offset): full_path = filepath + '/' + filename if not os.path.isfile(full_path): raise ExtractionError samples, sample_rate = librosa.load(full_path, offset=offset, sr=None, duration=1) tempogram = librosa.feature.tempogram(y=samples, sr=sample_rate) mfcc = librosa.feature.mfcc(y=samples, sr=sample_rate) pitches, magnitudes = librosa.piptrack(y=samples, sr=sample_rate) return np.concatenate([ tempogram.flatten()[:MAX_TEMPOGRAM_WEIGHT], mfcc.flatten()[:MAX_MFCC_WEIGHT], magnitudes.flatten()[:MAX_PITCH_WEIGHT] ])
def findpitch(file): y, sr = librosa.load(file) stft = np.abs(librosa.stft(y)) pitches, magnitudes = librosa.piptrack(y, sr=sr, S=stft, fmin=70, fmax=400) pitch = [] for i in range(magnitudes.shape[1]): index = magnitudes[:, 1].argmax() pitch.append(pitches[index, i]) # pitch_tuning_offset = librosa.pitch_tuning(pitches) pitch = [p for p in pitch if p > 0] # pitchmean = np.mean(pitch) # pitchstd = np.std(pitch) # pitchmax = np.max(pitch) pitchmin = np.min(pitch) return pitchmin
def find_songKey(indata): # find key of recent layer global song_key pitches, magnetude = librosa.piptrack(y=indata[:,1], sr=SAMPLE_RATE, fmin=250.0,fmax=1050.0) time_slice = 1 index = magnetude[:, time_slice].argmax() pitch = pitches[index, time_slice] while pitch == 0.0: time_slice = (time_slice + 10)%len(pitches) index = magnetude[:, time_slice].argmax() pitch = pitches[index, time_slice] key = librosa.hz_to_note(pitch, octave=False) print("findKey: ",key) keyId = note_names.index(key[0]) song_key = keyId
def extract_features(filename): # 파일 로드 부분 y, sr = librosa.load(filename) # mel spectrogram을 fmax = 8000으로 계산 후 차원 고려하여 transpose S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128, fmax=8000) S = np.transpose(S) # piptrack 알고리즘 pitches, magnitudes = librosa.piptrack(y=y, sr=sr) pitches = np.transpose(pitches) magnitudes = np.transpose(magnitudes) # 가장 유효한 값들을 나열한 일차원 sequence로 변환 pit_seq = pip_to_pitch(pitches) mag_seq = pip_to_pitch(magnitudes) # 발화 전체 중 silence의 비율 (delay_p) delay_p, mag_index = find_delay(mag_seq, np.std(mag_seq)) # 유효 frame에 대해 pitch 의 mean 및 stdev mean, error = effective_analysis(pit_seq, mag_index) return delay_p, mean, error
def f0_estimate(f, S, fs, fmin=50, fmax=500, threshold=0.2): ''' :param f: stft的频率值 :param S: stft频谱 :param fs: 采样率 :param fmin: :param fmax: :param threshold: :return: f0 shape=(,t) ''' y = np.abs(S) pitches, mag = lib.piptrack(S=y, sr=fs, fmin=fmin, fmax=fmax, threshold=threshold) f0 = f[np.argmax(mag, axis=0)] return f0
def aaa(): p = pyaudio.PyAudio() stream = p.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK) print("* recording") frames = [] for i in range(0, int(RATE / CHUNK * RECORD_SECONDS)): data = stream.read(CHUNK) frames.append(data) print("* done recording") stream.stop_stream() stream.close() p.terminate() wf = wave.open(WAVE_OUTPUT_FILENAME, 'wb') wf.setnchannels(CHANNELS) wf.setsampwidth(p.get_sample_size(FORMAT)) wf.setframerate(RATE) wf.writeframes(b''.join(frames)) wf.close() y , sr =librosa.load(r'C:\Users\PIYUSH\Desktop\output.wav',sr=32050) #import librosa s = np.abs(librosa.stft(y)) pitches, magnitudes = librosa.piptrack(y=y, sr=sr) s_mean=(np.mean(s)) s_pitch=(np.mean(magnitudes)) result=(s_mean+s_pitch)/2 return result
def get_audio_analysis(song_url): if(song_url is None): return None, None, None, None response = urlopen(song_url) urllib.request.urlretrieve(song_url, "current.mp3") y, sr = librosa.load("./current.mp3") tempo, beats = librosa.beat.beat_track(y=y, sr=sr) #Tempo = beats/minute pitches, magnitudes = librosa.piptrack(y=y, sr=sr) #pitch = Frequency pitch_ave = np.average(pitches) harm = np.sum(librosa.effects.harmonic(y)) perc = np.sum(librosa.effects.percussive(y)) return tempo, pitch_ave, harm, perc
def get_freq_mag(y,sr,window_size=None, window_shift=None): ''' valores por defecto: - ventanas de 25ms - desplazamientos de la ventana de 10ms ''' if window_size is None: n_fft = int(0.025*sr) else: n_fft = int(window_size*0.001*sr) if window_shift is None: hop_length = int(0.010*sr) else: hop_length = int(window_shift*0.001*sr) #Recoger las frecuencias presentes y sus magnitudes frequencies,magnitudes = librosa.piptrack(y,sr,hop_length=hop_length,n_fft=n_fft) frequencies = np.transpose(frequencies) magnitudes = np.transpose(magnitudes) return frequencies, magnitudes
def long_term_info(y, sr): interval = 0.01 window = 0.256 start_stft = time.time() stft = librosa.stft(y, hop_length=int(interval * sr), n_fft=int(window * sr)) end_stft = time.time() stft = np.transpose(stft) power = np.abs(stft)**2 start_pitch = time.time() pitch, mag = librosa.piptrack(y=y, sr=sr, hop_length=int(interval * sr), n_fft=int(window * sr)) end_pitch = time.time() duration_stft = end_stft - start_stft duration_pitch = end_pitch - start_pitch pitch = np.transpose(pitch) return stft, power, pitch
def get_freq_mag(y,sr,window_size=None, window_shift=None): ''' default values: - windows of 25ms - window shifts of 10ms ''' if window_size is None: n_fft = int(0.025*sr) else: n_fft = int(window_size*0.001*sr) if window_shift is None: hop_length = int(0.010*sr) else: hop_length = int(window_shift*0.001*sr) #collect frequencies present and their magnitudes frequencies,magnitudes = librosa.piptrack(y,sr,hop_length=hop_length,n_fft=n_fft) frequencies = np.transpose(frequencies) magnitudes = np.transpose(magnitudes) return frequencies, magnitudes
def getVar(path): # pitches, magnitudes = lib.piptrack(y=wf, sr=sr, fmin=50, fmax=500) # pitches = pitches[np.nonzero(pitches)] # # pitches = np.asarray(pitches) # # duration = np.array(list(range(0, duration))) # plt.subplot(211) # plt.plot(wf) # plt.title('wave') # plt.subplot(212) # plt.imshow(pitches, aspect="auto", interpolation="nearest", origin="bottom") # plt.plot(pitches) # plt.title("pitches") # plt.show() y, sr = librosa.load(path) # D = librosa.amplitude_to_db(np.abs(librosa.stft(y)), ref=np.max, top_db=10) duration = float(lib.get_duration(y=y, sr=sr)) drf = lib.time_to_frames(duration, sr=sr) pits, mags = lib.piptrack(y=y, sr=sr, fmin=50, fmax=500) print("pits type === ", type(pits), end='\n') print("megs len === ", len(mags[0]), end='\n') print(np.shape(mags)) plt.subplot(211) librosa.display.specshow( pits, x_axis='time', y_axis='log, frames', fmax=500) plt.title('pits') plt.subplot(212) librosa.display.specshow(mags, x_axis='time', y_axis='log') plt.title('mags') # plt.plot(duration) # plt.title("check duration") plt.show()
def findKey_arbitrary(indata): # find key of recent layer # find the notes for the appropriate chord in the note bank. pitches, magnetude = librosa.piptrack(y=indata[:, 1], sr=SAMPLE_RATE, fmin=250.0, fmax=1050.0) time_slice = 1 index = magnetude[:, time_slice].argmax() pitch = pitches[index, time_slice] while pitch == 0.0: time_slice = (time_slice + 10) % len(pitches) index = magnetude[:, time_slice].argmax() pitch = pitches[index, time_slice] key = librosa.hz_to_note(pitch, octave=False) print("findKey: ", key) keyId = note_names.index(key[0]) seq = [keyId % 12, (keyId + 4) % 12, (keyId + 7) % 12] # Major chord always construct_chord(seq)
def calc_pitches(audio_file): with tempfile.TemporaryDirectory() as tmpdirname: #conversion to wav 44100 s16 mono in_path = tmpdirname + '/in.mp3' out_path = tmpdirname + '/out.wav' in_file = open(in_path, 'wb') in_file.write(audio_file.read()) audio_file.seek(0) subprocess.call([ 'ffmpeg', '-i', in_path, '-acodec', 'pcm_s16le', '-ac', '1', '-ar', '44100', out_path ]) #pitch detection y, sr = librosa.load(out_path, sr=44100) duration = librosa.get_duration(y=y, sr=sr) pitches, magnitudes = librosa.piptrack(y=y, sr=sr, fmin=min_freq, fmax=max_freq, hop_length=int(duration * sr / 10)) #for i in range(len(pitches)): # print(pitches[i]) # print(magnitudes[i]) # print('\n') #get unique freq bins max_mag = np.amax(magnitudes, axis=1) freqs = list( set([ min_freq + freq_bin_width * round( (f[1] - min_freq) / freq_bin_width) for f in list(enumerate(np.amax(pitches, axis=1))) if f[1] >= min_freq and max_mag[f[0]] > 7 ])) return freqs return []
def run(self): logging.info("Starting Pitch detector") # This loop condition have to be checked frequently, so the code inside may not be blocking while not self.terminated: new_frame = self.audio_frames.get() # Get new frame (blocking) if self.counter == 0: self.frames = new_frame self.counter += 1 elif self.counter >= BUFFER_SIZE: self.frames = np.append(self.frames, new_frame) pitches, magnitudes = librosa.piptrack(self.frames, SAMPLE_RATE) # Select out pitches with high energy pitches = pitches[magnitudes > np.median(magnitudes)] new_tuning = int(50 + 100 * librosa.pitch_tuning(pitches)) if np.abs(self.last_pitch - new_tuning) > PITCH_CHANGE_THRESHOLD: self.last_pitch = new_tuning self.manager.new_tuning(new_tuning) self.counter = 0 else: self.frames = np.append(self.frames, new_frame) self.counter += 1
def audioAnalyze(y, sr): # librosa.piptrack to extract pitch pitches, magnitudes = librosa.piptrack(y=y, sr=sr) # the first ~10 columns are often meaningless pitches_candidate = [] for time in range(10, len(pitches[0])): pitches_candidate.append(detect_pitch(pitches, time)) # onset_detection: assumption: chord usually happen just after onset onset_frames = librosa.onset.onset_detect(y, sr) chord_note_candidate = [] for x in onset_frames: if x <= len(pitches_candidate): # so that the frequencies can be conveniently rounded to exact frequencies for excluding the harmonics chord_note_candidate.append( librosa.note_to_hz(librosa.hz_to_note(pitches_candidate[x]))) chord_note_no_harmonics = [] for x in chord_note_candidate: # x is a list of list (...), so x[0] corresponding to freq in an onset temp = [] for i in range(0, len(x[0])): if i == 0: temp.append(x[0][i]) for j in range(0, i): if j == i - 1: temp.append(x[0][i]) break elif checkHarmonics(x[0][j], x[0][i]): break chord_note_no_harmonics.append(temp) # https://stackoverflow.com/questions/3724551/python-uniqueness-for-list-of-lists chord_note_no_harmonics = [ list(x) for x in set(tuple(x) for x in chord_note_no_harmonics) ] return chord_note_no_harmonics
def __test(y, sr, S, n_fft, hop_length, fmin, fmax, threshold): pitches, mags = librosa.piptrack( y=y, sr=sr, S=S, n_fft=n_fft, hop_length=hop_length, fmin=fmin, fmax=fmax, threshold=threshold)
import librosa import matplotlib.pyplot as plt filename = 'temp.wav' y ,sr = librosa.load(filename) print(sr) pitches, magnitudes= librosa.piptrack(y=y, sr=sr) mfccs = librosa.feature.mfcc(y=y, sr=sr) print('mfcc calc done') #visualize mfccs librosa.display.specshow(mfccs, x_axis='time') #plt.imshow(mfccs) plt.colorbar() plt.title('MFCC') plt.tight_layout() plt.show()
import librosa import librosa.display print("Now Loading file...") s_n = "WAV_CUT/BornToBeNN_song3.wav" y, sr = librosa.load(s_n) print("Now Dividing file...") y_h, y_p = librosa.effects.hpss(y) print("Now Pitching file...") pitches, magnitudes = librosa.piptrack(y=y, sr=sr) s_h = "WAV_CUT/BornToBeNN_song3_har.wav" s_p = "WAV_CUT/BornToBeNN_song3_per.wav" print("Now Saving file...") librosa.output.write_wav(s_h, y_h, sr) librosa.output.write_wav(s_p, y_p, sr) print(len(pitches)) for j in range(0, len(pitches[0]) - 1): for i in range(0, len(pitches) - 1): if magnitudes[i, j] != 0: print("At " + str(0.3 * j) + " ~ " + str(0.3 * (j + 1)) + " Seconds.") print("Hz : " + str(pitches[i, j]) + "\tdB : " + str(magnitudes[i, j])) #pitches[f, t] magnitudes[f, t] #f : Frequency num Frequency large #t : at time t at time t
def features(X, sample_rate): stft = np.abs(librosa.stft(X)) # fmin 和 fmax 对应于人类语音的最小最大基本频率 pitches, magnitudes = librosa.piptrack(X, sr=sample_rate, S=stft, fmin=70, fmax=400) pitch = [] for i in range(magnitudes.shape[1]): index = magnitudes[:, 1].argmax() pitch.append(pitches[index, i]) pitch_tuning_offset = librosa.pitch_tuning(pitches) pitchmean = np.mean(pitch) pitchstd = np.std(pitch) pitchmax = np.max(pitch) pitchmin = np.min(pitch) # 频谱质心 cent = librosa.feature.spectral_centroid(y=X, sr=sample_rate) cent = cent / np.sum(cent) meancent = np.mean(cent) stdcent = np.std(cent) maxcent = np.max(cent) # 谱平面 flatness = np.mean(librosa.feature.spectral_flatness(y=X)) # 使用系数为50的MFCC特征 mfccs = np.mean(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0) mfccsstd = np.std(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0) mfccmax = np.max(librosa.feature.mfcc(y=X, sr=sample_rate, n_mfcc=50).T, axis=0) # 色谱图 chroma = np.mean(librosa.feature.chroma_stft(S=stft, sr=sample_rate).T, axis=0) # 梅尔频率 mel = np.mean(librosa.feature.melspectrogram(X, sr=sample_rate).T, axis=0) # ottava对比 contrast = np.mean(librosa.feature.spectral_contrast(S=stft, sr=sample_rate).T, axis=0) # 过零率 zerocr = np.mean(librosa.feature.zero_crossing_rate(X)) S, phase = librosa.magphase(stft) meanMagnitude = np.mean(S) stdMagnitude = np.std(S) maxMagnitude = np.max(S) # 均方根能量 rmse = librosa.feature.rmse(S=S)[0] meanrms = np.mean(rmse) stdrms = np.std(rmse) maxrms = np.max(rmse) ext_features = np.array([ flatness, zerocr, meanMagnitude, maxMagnitude, meancent, stdcent, maxcent, stdMagnitude, pitchmean, pitchmax, pitchstd, pitch_tuning_offset, meanrms, maxrms, stdrms ]) ext_features = np.concatenate((ext_features, mfccs, mfccsstd, mfccmax, chroma, mel, contrast)) return ext_features
win_length=win_length) times = librosa.frames_to_time(beats, sr=sr) frames = librosa.time_to_frames( times, sr=sr, hop_length=hop_length, n_fft=n_fft, win_length=win_length, ) # file=open((os.path.join(args.outdir, name))+'_beats.txt', "w+") # for beat in beats: # file.write(str(beat)+' ') # file.close() np.save((os.path.join(args.outdir, name)) + "_beats", np.array(beats)) """extract pitches""" pitches, magnitudes = librosa.piptrack( y=y, sr=sr, n_fft=n_fft, hop_length=hop_length, win_length=win_length, ) pitches = pitches.T # file=open((os.path.join(args.outdir, name))+'_pitches.txt',"w+") pitch = np.zeros((pitches.shape[0])) for i in range(pitches.shape[0]): pitch[i] = max(pitches[i]) # file.close() np.save((os.path.join(args.outdir, name)) + "_pitch", pitch)
def get_pitch(wavefile): y, sr = librosa.load(wavefile) if len(y) % 2 != 0: y = y[:-1] pitches, mag = librosa.piptrack(y=y, sr=sr) return pitches, mag