def plotGraph(vocalsName, beatsName): vocalsFile = './spedUpVocals/{}'.format(vocalsName) + '.mp3' beatsFile = './spedUpBeats/{}'.format(beatsName) + '.mp3' # Compute local onset autocorrelation # y, sr = librosa.load('./output/funk/accompaniment.wav', duration=60) # musica1 y, sr = librosa.load(vocalsFile, duration=60) hop_length = 512 oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length) tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length) # Compute global onset autocorrelation ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0]) ac_global = librosa.util.normalize(ac_global) # Estimate the global tempo for display purposes tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr, hop_length=hop_length)[0] fig, ax = plt.subplots(nrows=1, figsize=(120, 15)) times = librosa.times_like(oenv, sr=sr, hop_length=hop_length) # ax.plot(times, oenv, label='Onset strength') ax.plot(times, oenv, label='Vocal') ax.label_outer() ax.legend(frameon=True) # y, sr = librosa.load('./output/recairei/vocals.wav', duration=60) # musica2 y, sr = librosa.load(beatsFile, duration=60) hop_length = 512 oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length) tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length) # Compute global onset autocorrelation ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0]) ac_global = librosa.util.normalize(ac_global) # Estimate the global tempo for display purposes tempo = librosa.beat.tempo(onset_envelope=oenv, sr=sr, hop_length=hop_length)[0] times = librosa.times_like(oenv, sr=sr, hop_length=hop_length) ax.plot(times, oenv, 'C2', label='Accompaniment') # ax.plot(times, oenv, 'C2',label='Onset strength') ax.label_outer() ax.legend(frameon=True) graphFileName = './graphs/{}-{}'.format(vocalsName, beatsName) os.makedirs(os.path.dirname(graphFileName), exist_ok=True) plt.savefig(graphFileName) plt.close()
def get_defects_echo(self): """ Получение маркеров дефекта echo. """ # Сильное эхо детектируется по значению автокорреляционной фунции по короткому окну. # Если использовать только данный признак, то возникает слишком много ложных срабатываний # на музыке, для которой характерно повторение звуков (темп, барабаны и т.д.). # Для отсечения музыки вычисляется глобальное значение автокорреляционной функции # для темпограммы - сохранение данной величины на высоком уровне свидетельствует # о сохранении темпа в записи. # Для скорости отсечение по глобальной автокорреляции темпограммы делаем по одному # каналу. # получаем список сегментов для обработки + время каждого сегмента segs = self.separator(self.Settings.Echo.Sep) # загрузка настроек setting = self.Settings.Echo # обрабатываем посегментно for s in segs: if self.FlagChannel == 2: # обрабатываем каждый канал сегмента for n, x in enumerate(s[0]): # Ориентируемся по каналу 0 для определения подходящей темпограммы. # Для этого не требуется построение спектрограмм. oenv = librosa.onset.onset_strength(y=x, sr=self.SampleRate) tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=self.SampleRate) acg = librosa.autocorrelate(oenv, max_size=tempogram.shape[0]) acg = librosa.util.normalize(acg) if (acg[:len(acg) // 2].mean()) > setting.GlobNormCorrThr: return # Для анализа эхо не строим никакие спектрограммы. ln = int(self.SampleRate * setting.LocCorrWin) parts = len(x) // ln for i in range(parts): yp = x[i * ln:(i + 1) * ln] ac = librosa.autocorrelate(yp) ac = ac[ln // 5:] if max(ac) > setting.LocCorrThr: print( f'root path file: {self.FileName}, channel number {n}, name of defect: echo,' f' time mark: {s[1] + i * (ln / self.SampleRate)}, ' f'{s[1] + (i + 1) * (ln / self.SampleRate)}')
def compute_BSpectrum(self, ret): """ Compute beat spectrogram of the audio signal returns: 1D beat spectrum of the audio signal & 2D beat spectrogram """ assert type(ret) == str, \ "Please specify a string \'spectrum\'|\'spectrogram\'|\'both\'" self.pspec = np.abs(lbr.magphase(self.spec)[0]) ** 2 # Power spectrogram # Beat spectrogram for i in range(self.pspec.shape[0]): self.bspec_[i] = lbr.autocorrelate(self.pspec[i], max_size=self.pspec[i].size) # Beat spectrum for j in range(self.pspec.shape[0]): self.bspec += self.bspec_[j,:] # Normalize the beat spectrum self.bspec/=self.bspec[0] # Setting the first element to zero, so as to remove DC and preserve Length self.bspec[self.bspec==1]=0 # Return what has been asked for if ret == 'specrtum': return self.bspec elif ret == 'spectrogram': return self.bspec_ elif ret == 'both': return self.bspec, self.bspec_ else: print("Please specify \'spectrum\'|\'spectrogram\'|\'both\'")
def getFeatures(self, path): # e.g., features = getFeatures('noise_data/user/5.wav') signal, samplingRate = lbr.load(path) # Compute MFCC features from the raw signal frame_ms = 30 mfcc = lbr.feature.mfcc(y=signal, sr=samplingRate, hop_length=int(samplingRate*frame_ms/1000), n_mfcc=13) # Compute energy of each frame energy = lbr.feature.rmse(y=signal, hop_length=int(samplingRate*frame_ms/1000)) # Compute pitch using autocorrelation autocorrelation = lbr.autocorrelate(signal) # The first-order differences (delta features) #mfcc_delta = lbr.feature.delta(mfcc) # Zero-crossing rate #zerorate = lbr.feature.zero_crossing_rate(signal) # Roll-off frequency #rolloff = lbr.feature.spectral_rolloff(y=signal, sr=samplingRate) # Spectral bandwidth #bandwidth = lbr.feature.spectral_bandwidth(y=signal, sr=samplingRate) #TODO: More features return (mfcc, energy, autocorrelation)
def linear_predictive_coding(frames, n_coeff=20): """Return linear predictive coding coefficients for each audio frame. frames : 2-D numpy.ndarray where each line represents an audio frame n_coeff : number of LPC coefficients to generate (also equal to the maximum autocorrelation lag order considered) """ # Check arguments validity. Adjust the number of coefficients. check_type_validity(frames, np.ndarray, 'frames') if frames.ndim != 2: raise ValueError('`frames` should be a 2-D np.array.') check_type_validity(n_coeff, int, 'n_coeff') if n_coeff < 1: raise ValueError('`n_coeff` should be a strictly positive int.') n_coeff = min(n_coeff, frames.shape[1] - 1) # Compute the frame-wise LPC coefficients. autocorrelations = librosa.autocorrelate(frames, n_coeff + 1) lpc = np.array([ # Levinson-Durbin recursion. False positive pylint: disable=no-member scipy.linalg.solve_toeplitz(autocorr[:-1], autocorr[1:]) for autocorr in autocorrelations ]) # Compute the frame_wise root mean squared prediction errors. frame_wise_errors = np.array([ frames[:, i] - np.sum(lpc * frames[:, i - n_coeff:i][:, ::-1], axis=1) for i in range(n_coeff, frames.shape[1]) ]) frames_rmse = np.sqrt(np.mean(np.square(frame_wise_errors), axis=0)) # Return the LPC coefficients and error terms. return lpc, frames_rmse
def test_tempogram_odf_equiv(tempo, center): sr = 22050 hop_length = 512 duration = 8 odf = np.zeros(duration * sr // hop_length) spacing = sr * 60.0 // (hop_length * tempo) odf[::int(spacing)] = 1 odf_ac = librosa.autocorrelate(odf) tempogram = librosa.feature.tempogram( onset_envelope=odf, sr=sr, hop_length=hop_length, win_length=len(odf), window=np.ones, center=center, norm=None, ) idx = 0 if center: idx = len(odf) // 2 assert np.allclose(odf_ac, tempogram[:, idx])
def estimate_tempo(ose): """ This function uses the precomputed global tempo information parameters to estimate the tempo of one piece :param ose: The onset strength envelope :return: The tactus estimate, the estimated tempo expressed in terms of OSE frames and whether duple (True) or triple (False) tempo is assumed """ # The list of tempo period strengths TPS = [] # Calculate autocorrelation of onset strength envelope ac = librosa.autocorrelate(ose) # For each frame of the autocorrelated onset strength envelope save the # weighted value (as seen in the Ellis paper) for tau in range(1, ose.size): res = autocorrelation_weighting(tau, Globals.TAU_0) * ac[tau] TPS.append(res) # This index stores the highest value -> this indicates the most likely tempo tau_index = np.argmax(TPS) # Express in terms of samples index_samples = tau_index * Globals.FFT_HOP # Express in terms of seconds tau_est = index_samples / Globals.OSE_SAMPLE_RATE tempo_est = 60 / tau_est # Helper functions for calculating the probabilities of duple and triple tempos def get_TPS2(tau): return TPS[tau] + 0.5 * TPS[2 * tau] + 0.25 * TPS[2 * tau - 1] + 0.25 * TPS[2 * tau + 1] def get_TPS3(tau): return TPS[tau] + 0.33 * TPS[3 * tau] + 0.33 * TPS[3 * tau - 1] + 0.33 * TPS[3 * tau + 1] TPS2 = [] TPS3 = [] search_range = 2000 # corresponds to the first 8 seconds of the song for tau in range(1, search_range): TPS2.append(get_TPS2(tau)) TPS3.append(get_TPS3(tau)) tau2 = np.argmax(TPS2) tau3 = np.argmax(TPS3) max_vals = [TPS[tau_index], TPS2[tau2], TPS3[tau3]] metre = np.argmax(max_vals) if metre == 0: # Duple tempo normal time tau_samples = tau_index * Globals.FFT_HOP tactus = tau_samples / Globals.OSE_SAMPLE_RATE return tactus, tau_index, True elif metre == 1: # Duple tempo double time tau_samples = tau2 * Globals.FFT_HOP tactus = (1 / 2) * tau_samples / Globals.OSE_SAMPLE_RATE return tactus, tau2, True elif metre == 2: # Triplet tempo tau_samples = tau3 * Globals.FFT_HOP tactus = (1 / 3) * tau_samples / Globals.OSE_SAMPLE_RATE return tactus, tau3, False
def get_pitch(self, fmin=50.0, fmax=2000.0): freqs = [] sr = self._sampling_rate x = self._audio_array onset_samples = librosa.onset.onset_detect(x, sr=sr, units='samples', hop_length=self._hop_length, backtrack=False, pre_max=20, post_max=20, pre_avg=100, post_avg=100, delta=0.2, wait=0) onset_boundaries = np.concatenate([[0], onset_samples, [len(x)]]) for i in range(len(onset_boundaries) - 3): n0 = onset_samples[i] n1 = onset_samples[i + 1] r = librosa.autocorrelate(x[n0:n1]) i_min = sr / fmax i_max = sr / fmin r[:int(i_min)] = 0 r[int(i_max):] = 0 # Find the location of the maximum autocorrelation. i = r.argmax() f0 = float(sr) / i freqs.append(f0) freqs1 = np.array(freqs) return np.median(freqs1), freqs1.mean()
def plot_static_beat(tempo, onset_env, sampling_rate): # Convert to scalar tempo = np.asscalar(tempo) # Compute 2-second windowed autocorrelation hop_length = 512 auto_correlation = librosa.autocorrelate(onset_env, 2 * sampling_rate // hop_length) freqs = librosa.tempo_frequencies(len(auto_correlation), sr=sampling_rate, hop_length=hop_length) # Plot on a BPM axis. We skip the first (0-lag) bin. fig = plt.figure(figsize=(8, 4)) ax = fig.add_subplot(111) ax.semilogx(freqs[1:], librosa.util.normalize(auto_correlation)[1:], label='Onset autocorrelation', basex=2) ax.axvline(tempo, 0, 1, color='r', alpha=0.75, linestyle='--', label='Tempo: {:.2f} BPM'.format(tempo)) ax.grid() ax.axis('tight') return fig
def __test(y, max_size): ac = librosa.autocorrelate(y, max_size=max_size) if max_size is None or max_size > len(y): eq_(len(ac), len(y)) else: eq_(len(ac), max_size)
def plot_correlation1(self, onset_env, sr): hop_length = 512 ac = librosa.autocorrelate(onset_env, 2 * sr // hop_length) freqs = librosa.tempo_frequencies(len(ac), sr=sr, hop_length=hop_length) self.li.set_xdata(freqs[1:]) self.li.set_ydata(librosa.util.normalize(ac)[1:]) plt.pause(0.001)
def estimate_pitch(self, segment, sr, fmin=50.0, fmax=2000.0): global hop_length r = librosa.autocorrelate(segment) i_min = sr / fmax i_max = sr / fmin r[:int(i_min)] = 0 r[int(i_max):] = 0 i = r.argmax() f0 = float(sr) / i return int(f0)
def onset_estimate_bpm(onsets, start_bpm, fft_res): """Estimate the BPM from an onset envelope Arguments: onsets -- (ndarray) time-series of onset strengths start_bpm -- (float) initial guess of the BPM fft_res -- (float) resolution of FFT (sample rate / hop length) Returns bpm: bpm -- (float) estimated BPM """ ac_size = 4.0 duration = 90.0 end_time = 90.0 bpm_std = 1.0 # Chop onsets to X[(upper_limit - duration):upper_limit] # or as much as will fit maxcol = min(len(onsets)-1, np.round(end_time * fft_res)) mincol = max(0, maxcol - np.round(duration * fft_res)) # Use auto-correlation out of 4 seconds (empirically set??) ac_window = np.round(ac_size * fft_res) # Compute the autocorrelation x_corr = librosa.autocorrelate(onsets[mincol:maxcol], ac_window) # FIXME: 2013-01-25 08:55:40 by Brian McFee <*****@*****.**> # this fails if ac_window > length of song # re-weight the autocorrelation by log-normal prior bpms = 60.0 * fft_res / (np.arange(1, ac_window+1)) # Smooth the autocorrelation by a log-normal distribution x_corr = x_corr * np.exp(-0.5 * ((np.log2(bpms / start_bpm)) / bpm_std)**2) # Get the local maximum of weighted correlation x_peaks = librosa.localmax(x_corr) # Zero out all peaks before the first negative x_peaks[:np.argmax(x_corr < 0)] = False # Choose the best peak out of .33, .5, 2, 3 * start_period candidates = np.multiply( np.argmax(x_peaks * x_corr), [1.0/3, 1.0/2, 1.0, 2.0, 3.0]) candidates = candidates.astype(int) candidates = candidates[candidates < ac_window] best_period = np.argmax(x_corr[candidates]) return 60.0 * fft_res / candidates[best_period]
def animate(i): p = pyaudio.PyAudio() stream = p.open(format=pyaudio.paInt16, channels=1, rate=RATE, input=True, frames_per_buffer=CHUNK) data = np.fromstring(stream.read(CHUNK), dtype=np.int16) abs_data = np.abs(data) mean_abs_data = np.mean(abs_data) #print(mean_abs_data) if mean_abs_data > 1000: # 피아노 소리가 들리지 않을 때는 계산하지 않음 n = len(data) y = librosa.autocorrelate(x, max_size=512) x1 = np.linspace(0, 44100 / 2, n) # plt.plot(data) # plt.show() y = np.fft.fft(data) / n y = np.absolute(y) sum_y = np.sum(y) sum_y_list.append(sum_y) print(sum_y) y = y[range(int(n / 2))] y1 = copy.copy(y) # peak 값을 찾기 위한 임계점을 유동적으로 하기 위한 기준 잡기 max_peak = 0 std_peaks, _ = find_peaks(y, height=1500) #print('std_peak : ', std_peaks) if len(std_peaks) > 0: max_peak = np.max(y[std_peaks]) std_threshold = max_peak * 0.4 #print('max_peak점 : ', std_threshold) peaks, _ = find_peaks(y, height=std_threshold) gye_name = scale(peaks * x_interval) if not gye_name[0]: print(1) else: print('y:', y) multiple_freq_decrease(y, peaks) peaks1, _ = find_peaks(y, height=std_threshold) print('peaks : ', peaks1) gye_name1 = scale(peaks1 * x_interval) print(gye_name1) stream.stop_stream() print('빠져나옴') stream.close() p.terminate() line.set_data(x, y) return line,
def __test(y, truth, max_size, axis): ac = librosa.autocorrelate(y, max_size=max_size, axis=axis) my_slice = [slice(None)] * truth.ndim if max_size is not None and max_size <= y.shape[axis]: my_slice[axis] = slice(min(max_size, y.shape[axis])) if not np.iscomplexobj(y): assert not np.iscomplexobj(ac) assert np.allclose(ac, truth[my_slice])
def estimate_pitch(segment, sr, fmin=50.0, fmax=2000.0): # Computa a autocorrelação do segmento de entrada. r = librosa.autocorrelate(segment) # Defini os limites inferiores e superiores para o argmax de autocorrelação. i_min = sr / fmax i_max = sr / fmin r[:int(i_min)] = 0 r[int(i_max):] = 0 # Encontra a localização da autocorrelação máxima. i = r.argmax() f0 = float(sr) / i return f0
def autocorrelate_bps(self, amount=1): if self.sr: print('sr = ', self.sr) # lag = bps * amount lag = self.bps * amount ac = librosa.autocorrelate(self.y, max_size=lag * self.sr / 512) self.autocor_bps = [ac, lag] return [ac, lag] else: print( "[ autocorrelate_bps ] Error happend: no sr, y, bps... \n" "[ autocorrelate_bps ] Try to use find_beat_per_second() and then repeat !" )
def estimate_pitch_ac(segment, sr=global_sr, fmin=50.0, fmax=2000.0): # Compute autocorrelation of input segment. r = librosa.autocorrelate(segment) # Define lower and upper limits for the autocorrelation argmax. i_min = sr / fmax i_max = sr / fmin r[:int(i_min)] = 0 r[int(i_max):] = 0 # Find the location of the maximum autocorrelation. i = r.argmax() f0 = float(sr) / i return f0
def _print_image(file, n, onset_env, hi_lag_1, lo_lag_1, hi_lag_6, lo_lag_6, hi_lag_8, lo_lag_8): """ Print PDF of RLAC. """ ac = librosa.autocorrelate(onset_env) for i in range(ac.shape[0]): ac[i] /= ac.shape[0] - i plt.rc('font', family='Times New Roman') ax = plt.gca() plt.gcf().set_size_inches(4, 2) ax.get_xaxis().set_major_formatter( matplotlib.ticker.FuncFormatter(lambda x, p: format(int(x) // 1000))) plt.plot(ac) plt.ylim(bottom=0.04) plt.xlim(left=0, right=len(ac)) ac_8 = ac.copy() ac_8[:hi_lag_8] = 0 ac_8[lo_lag_8:] = 0 x_8 = hi_lag_8 + (lo_lag_8 - hi_lag_8) // 2 plt.axvline(x=x_8, color='black', linestyle='dashed') plt.plot(ac_8, label='8 IBIs±4%') ac_6 = ac.copy() ac_6[:hi_lag_6] = 0 ac_6[lo_lag_6:] = 0 x_6 = hi_lag_6 + (lo_lag_6 - hi_lag_6) // 2 plt.axvline(x=x_6, color='black', linestyle='dashed') plt.plot(ac_6, label='6 IBIs±4%') ac_1 = ac.copy() ac_1[:hi_lag_1] = 0 ac_1[lo_lag_1:] = 0 x_1 = hi_lag_1 + (lo_lag_1 - hi_lag_1) // 2 plt.axvline(x=x_1, color='black', linestyle='dashed') plt.plot(ac_1, label='1 IBI±4%') plt.ylabel('Autocorrelation') plt.xlabel('Lag in 1,000 Frames') plt.legend(loc='lower right') os.makedirs('figures', exist_ok=True) plt.savefig('figures/' + basename(file).replace('.wav', '_{}.pdf'.format(n)), bbox_inches='tight') plt.close()
def tempo_track(x, sr): hop_length = 200 # samples per frame onset_env = librosa.onset.onset_strength(x, sr=sr, hop_length=hop_length, n_fft=2048) S = librosa.stft(onset_env, hop_length=1, n_fft=512) fourier_tempogram = np.absolute(S) tmp = np.log1p(onset_env[n0:n1]) r = librosa.autocorrelate(tmp) tempo = librosa.beat.tempo(x, sr=sr) T = len(x) / float(sr) seconds_per_beat = 60.0 / tempo[0] beat_times = numpy.arange(0, T, seconds_per_beat) return beat_times
def detection(filename, fmin, fmax, verify=False): """ Detects whether a mosquito is present in 100ms snippets of a given audio file :param filename: Name of audio file; String :param fmin: Minimum frequency to look for; int :param fmax: Maximum frequency to look for; int :param verify: Whether or not to make plots for human verification of the result; Bool :return: Mosquito audio snippets AND binary Labels for all 100ms snippets of the audio AND sampling rate; np.array of floats AND np.array of ints AND int """ audio_parts = [] # Load and apply bandpass filter audio, sr = librosa.load(filename, sr=None) b, a = scipy.signal.butter(N=2, Wn=[300, 2048], btype='bandpass', fs=sr) audio = scipy.signal.lfilter(b, a, audio) # Calculate windows for signal ms100_window = sr // 10 # 100ms num_windows = audio.size // ms100_window start = 0 # Store labels for mosquito presence in hot-one-encoding labels = np.zeros(shape=num_windows, dtype=int) for i, window in enumerate(range(num_windows)): # Autocorrelation window = audio[start:start+ms100_window] r = librosa.autocorrelate(window) periodic, first_distance = is_periodic(r, sr//1000, sr) if periodic: # Pitch Detection T = (first_distance / sr) # Calculate period in seconds f0 = round(1 / T, 2) # Physics, f = 1/T in Hz if fmin <= f0 <= fmax: # Roughly mosquito frequency range (across species) labels[i] = 1 audio_parts.append(window) if verify: peaks, _ = scipy.signal.find_peaks(r, distance=sr//1000) plt.plot(r) plt.plot(peaks[:-1], r[peaks[:-1]], marker="x") plt.savefig(f"{i}.png") plt.close() start += ms100_window return audio_parts, labels, sr
def guess_note(y, sr): r = librosa.autocorrelate(y, max_size=5000) midi_hi = 120.0 midi_lo = 12.0 f_hi = 700 f_lo = 75 t_lo = sr / f_hi t_hi = sr / f_lo r[:int(t_lo)] = 0 r[int(t_hi):] = 0 t_max = r.argmax() note = librosa.hz_to_note(float(sr) / t_max) return note
def getVoicingActivity(audioSegment, hopLength, method='rmsEnergy', threshold=0.0): """ Description: Given an audio file as an array (obtain from audio=librosa.load(filename.wav)) this returns a binary array [1,0,0,1,1,1,1,...] where each element indicates the voicing nature of a frame of audio (determined by hopLength) """ if method == 'rmsEnergy': if threshold == 0.0: print( f"ERROR:Threshold was NOT calculated before utils.getVoicingActivity()." ) rmsEnergy = librosa.feature.rmse(audioSegment, frame_length=2 * hopLength, hop_length=hopLength, center=True) voicingActivity = [1] * rmsEnergy.shape[1] for k in range(rmsEnergy.shape[1]): if rmsEnergy[0][k] < threshold: voicingActivity[k] = 0 # DEBUG plots: #plt.plot(rmsEnergy[0]) #plt.hlines(thres, 0, len(rmsEnergy[0])) elif method == 'periodicity': frames = librosa.util.frame(audioSegment, frame_length=2 * hopLength, hop_length=hopLength) for indx in range(len(frames)): rxx = librosa.autocorrelate(frames[indx], max_size=10) else: raise ValueError( f"ERROR:Voicing Activity detection Method: {method} INVALID.") return voicingActivity
def setTempo(self, plot=False, force=False): if plot or force or np.isnan(self.tempo): hop_length = self.hopLength self.tempo = librosa.beat.estimate_tempo(self.onsetEnv, sr=self.sr, hop_length=hop_length) ac = librosa.util.normalize( librosa.autocorrelate(self.onsetEnv, 3 * self.sr // hop_length)) tempo_frames = (60 * self.sr / hop_length) / self.tempo self.autocorrelationStd = np.std(ac[int(tempo_frames):]) self.autocorrelationMean = np.mean(ac[int(tempo_frames):]) if plot: fig = plt.figure(figsize=(20, 10)) ax = fig.add_subplot(111) ax.plot(ac, label='Onset autocorrelation') ax.vlines([tempo_frames], 0, 1, color='r', alpha=0.75, linestyle='--', label='Tempo: {:.2f} BPM'.format(self.tempo)) ax.axhline(y=self.autocorrelationMean, color='k', linestyle=':', label='Mean+-Std {:.3f}'.format( self.autocorrelationStd)) ax.axhline(y=self.autocorrelationMean + self.autocorrelationStd, color='k', linestyle=':') ax.axhline(y=self.autocorrelationMean - self.autocorrelationStd, color='k', linestyle=':') librosa.display.time_ticks( librosa.frames_to_time(np.arange(len(ac)), sr=self.sr)) plt.title(self.fileName) plt.xlabel('Lag') plt.legend() plt.axis('tight') plt.savefig(self.fileName + '.png', format='png', dpi=300) plt.close(fig)
def estimate_pitch(segment, sr, fmin=librosa.note_to_hz('C3'), fmax=librosa.note_to_hz('C6')): # Compute autocorrelation of input segment. r = librosa.autocorrelate(segment) # Define lower and upper limits for the autocorrelation argmax. i_min = sr / fmax i_max = sr / fmin r[:int(i_min)] = 0 r[int(i_max):] = 0 # Find the location of the maximum autocorrelation. i = r.argmax() f0 = float(sr) / i return f0
def make_file_data(y, sr, hop, nfft, win_len): chroma = librosa.feature.chroma_stft(y=y, sr=sr, hop_length=hop) spectral_contrast = librosa.feature.spectral_contrast(y=y, sr=sr, hop_length=hop) onset_env = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop, n_fft=nfft) zcr = librosa.feature.zero_crossing_rate(y, frame_length=win_len, hop_length=hop) onsetLog = np.log1p(onset_env) ac = librosa.autocorrelate(onsetLog) mfcc = librosa.feature.mfcc(y, sr, n_mfcc=13) data = np.vstack([chroma, spectral_contrast, mfcc, onset_env, zcr, ac]).transpose(1, 0) return data
def __test_equiv(tempo, center): odf = np.zeros(duration * sr // hop_length) spacing = sr * 60. // (hop_length * tempo) odf[::int(spacing)] = 1 odf_ac = librosa.autocorrelate(odf) tempogram = librosa.feature.tempogram(onset_envelope=odf, sr=sr, hop_length=hop_length, win_length=len(odf), window=np.ones, center=center, norm=None) idx = 0 if center: idx = len(odf)//2 assert np.allclose(odf_ac, tempogram[:, idx])
def estimate_pitch(n0, n1, fmin=50.0, fmax=2000.0): #F0 ESTIMATION OF A GIVEN SEGMENT # Compute autocorrelation of input segment. segment = x[n0:n1] r = librosa.autocorrelate(segment) # Define lower and upper limits for the autocorrelation argmax. i_min = sr / fmax i_max = sr / fmin r[:int(i_min)] = 0 r[int(i_max):] = 0 # Find the location of the maximum autocorrelation. i = r.argmax() f0 = float(sr) / i f0s.append(f0) return f0
def tempoVSautoCorrelation(tempo, hop_length): tempo = np.asscalar(tempo) ac = librosa.autocorrelate(onset_env, 2 * sr // hop_length) freqs = librosa.tempo_frequencies(len(ac), sr=sr, hop_length=hop_length) plt.figure(figsize=(8, 4)) plt.semilogx(freqs[1:], librosa.util.normalize(ac)[1:], label='Onset autocorrelation', basex=2) plt.axvline(tempo, 0, 1, color='r', alpha=0.75, linestyle='--', label='Tempo: {:.2f} BPM'.format(tempo)) plt.xlabel('Tempo (BPM)') plt.grid() plt.title('Static tempo estimation') plt.legend(frameon=True) plt.axis('tight')
def estimate_pitch(segment, sr, fmin=50.0, fmax=2000.0): # Compute autocorrelation of input segment. print '1' r = librosa.autocorrelate(segment) print '1' # Define lower and upper limits for the autocorrelation argmax. i_min = sr / fmax print '1' i_max = sr / fmin print '1' r[:int(i_min)] = 0 print '1' r[int(i_max):] = 0 print '1' # Find the location of the maximum autocorrelation. i = r.argmax() print '1' f0 = float(sr) / i print '1' return f0
def generate(self): rhythm_bpm = np.empty((len(self.data), 8)) for i, song in enumerate(self.data): if self.verbose and i % 100 == 0: print("Got rhythm data for {0} songs".format(i)) oenv = librosa.onset.onset_strength(y=song, sr=self.sr) # tempo = librosa.beat.tempo(onset_envelope=onset_env, sr=self.sr) # dtempo = librosa.beat.tempo( # onset_envelope=onset_env, sr=self.sr, aggregate=None) tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=self.sr) tempogram_features = self.util.vector_to_features(tempogram) ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0]) ac_global = librosa.util.normalize(ac_global) tempo = librosa.beat.tempo(onset_envelope=oenv, sr=self.sr) rhythm_bpm[i] = np.hstack( [tempo, ac_global.mean(), ac_global.std(), tempogram_features]) return rhythm_bpm
def compute_stm(counts, nbins=300, ncycles=5): start_ind = 0 nsamples = nbins*ncycles end_ind = start_ind + nsamples n_acf = 1500 n_stm = 750 #int(nsamples/4.) ac_all, scale_all = [], [] while end_ind <= len(counts): ac = librosa.autocorrelate(counts[start_ind:end_ind], max_size=n_acf) ac = librosa.util.normalize(ac, norm=np.inf) ac_all.append(ac) scale = librosa.fmt(ac, n_fmt=n_stm) scale_all.append(scale) start_ind += nsamples end_ind += nsamples ac_all = np.array(ac_all) scale_all = np.array(scale_all) return ac_all, scale_all
def setTempo(self, plot=False, force=False): if plot or force or np.isnan(self.tempo): hop_length = self.hopLength self.tempo = librosa.beat.estimate_tempo(self.onsetEnv, sr=self.sr, hop_length=hop_length) ac = librosa.util.normalize(librosa.autocorrelate(self.onsetEnv, 3 * self.sr // hop_length)) tempo_frames = (60 * self.sr / hop_length) / self.tempo self.autocorrelationStd = np.std(ac[int(tempo_frames) :]) self.autocorrelationMean = np.mean(ac[int(tempo_frames) :]) if plot: fig = plt.figure(figsize=(20, 10)) ax = fig.add_subplot(111) ax.plot(ac, label="Onset autocorrelation") ax.vlines( [tempo_frames], 0, 1, color="r", alpha=0.75, linestyle="--", label="Tempo: {:.2f} BPM".format(self.tempo), ) ax.axhline( y=self.autocorrelationMean, color="k", linestyle=":", label="Mean+-Std {:.3f}".format(self.autocorrelationStd), ) ax.axhline(y=self.autocorrelationMean + self.autocorrelationStd, color="k", linestyle=":") ax.axhline(y=self.autocorrelationMean - self.autocorrelationStd, color="k", linestyle=":") librosa.display.time_ticks(librosa.frames_to_time(np.arange(len(ac)), sr=self.sr)) plt.title(self.fileName) plt.xlabel("Lag") plt.legend() plt.axis("tight") plt.savefig(self.fileName + ".png", format="png", dpi=300) plt.close(fig)
def get_features5(file): fp = FeaturePlan(sample_rate=22050) fp.addFeature('scfp: SpectralCrestFactorPerBand FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256')#19 fp.addFeature('sfp: SpectralFlatnessPerBand FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256') #19 fp.addFeature('loudness: Loudness FFTLength=0 FFTWindow=Hanning LMode=Relative blockSize=512 stepSize=256')#24 #fp.addFeature('ms: MagnitudeSpectrum FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256') #257 engine = Engine() engine.load(fp.getDataFlow()) afp = AudioFileProcessor() afp.processFile(engine,file) feats = engine.readAllOutputs() y, sr = librosa.load(file) print y.shape print sr hop_length = 256 oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length) tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length) ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])#384 ac_global = librosa.util.normalize(ac_global) print ac_global.shape tempo = librosa.beat.estimate_tempo(oenv, sr=sr, hop_length=hop_length) #1 print "tempo" , tempo print "tempogram" , tempogram.shape#384 tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) #print "tempo", tempo #print "beat_frames", beat_frames.shape beat_times = librosa.frames_to_time(beat_frames, sr=sr) #print "beat_times" , beat_times.shape #print beat_times y_harmonic, y_percussive = librosa.effects.hpss(y) # Compute MFCC features from the raw signal chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) #12 c = np.mean(chromagram,axis=1) print "c", c.shape print "chromagram" , chromagram.shape r = calc_statistical_features(chromagram) print r.shape a1 = calc_statistical_features(feats['scfp'].transpose()) #19*7 = 133 a1 = a1.reshape(a1.shape[0]*a1.shape[1]) print a1.shape a2 = calc_statistical_features(feats['sfp'].transpose()) #19*7 = 133 a2 = a2.reshape(a2.shape[0]*a2.shape[1]) print a2.shape a3 = calc_statistical_features(feats['loudness'].transpose()) #24*7 = 168 a3 = a3.reshape(a3.shape[0]*a3.shape[1]) print a3.shape a4 = calc_statistical_features(tempogram) #384*7 = 2688 a4 = a4.reshape(a4.shape[0]*a4.shape[1]) print a4.shape a5 = calc_statistical_features(chromagram) #12*7 = 84 a5 = a5.reshape(a5.shape[0]*a5.shape[1]) print a5.shape feature5_set = np.hstack((a1,a2,a3,a4,a5)) #266+168+84+2688 = 3206 print "feature5_set",feature5_set.shape return feature5_set
def test_feature(): file = "/mnt/hgfs/vmfiles/genres/pop/pop.00002.wav" fp = FeaturePlan(sample_rate=22050) fp.addFeature('mfcc: MFCC blockSize=512 stepSize=256')#13 fp.addFeature('sr: SpectralRolloff blockSize=512 stepSize=256')#1 fp.addFeature('sf: SpectralFlux blockSize=512 stepSize=256')#1 fp.addFeature('scfp: SpectralCrestFactorPerBand FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256')#19 fp.addFeature('sf1: SpectralFlatness FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256')#1 fp.addFeature('sc: SpectralShapeStatistics FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256') #4 fp.addFeature('sfp: SpectralFlatnessPerBand FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256') #19 fp.addFeature('energy: Energy blockSize=512 stepSize=256')#1 fp.addFeature('loudness: Loudness FFTLength=0 FFTWindow=Hanning LMode=Relative blockSize=512 stepSize=256')#24 fp.addFeature('ms: MagnitudeSpectrum FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256') #257 fp.addFeature('ps: PerceptualSharpness FFTLength=0 FFTWindow=Hanning blockSize=512 stepSize=256')#1 fp.addFeature('zcr:ZCR blockSize=512 stepSize=256')#1 engine = Engine() engine.load(fp.getDataFlow()) afp = AudioFileProcessor() afp.processFile(engine,file) feats = engine.readAllOutputs() ceps = feats['zcr'] #print ceps.shape #num_ceps = len(ceps) y, sr = librosa.load(file) print y.shape print sr hop_length = 256 oenv = librosa.onset.onset_strength(y=y, sr=sr, hop_length=hop_length) tempogram = librosa.feature.tempogram(onset_envelope=oenv, sr=sr, hop_length=hop_length) ac_global = librosa.autocorrelate(oenv, max_size=tempogram.shape[0])#384 ac_global = librosa.util.normalize(ac_global) print ac_global.shape tempo = librosa.beat.estimate_tempo(oenv, sr=sr, hop_length=hop_length) #1 print "tempo" , tempo print "tempogram" , tempogram.shape#384 tempo, beat_frames = librosa.beat.beat_track(y=y, sr=sr) print "tempo", tempo print "beat_frames", beat_frames.shape beat_times = librosa.frames_to_time(beat_frames, sr=sr) print "beat_times" , beat_times.shape print beat_times y_harmonic, y_percussive = librosa.effects.hpss(y) # Compute MFCC features from the raw signal mfcc = librosa.feature.mfcc(y=y, sr=sr, hop_length=hop_length, n_mfcc=13) print "mfcc" , mfcc.shape # And the first-order differences (delta features) mfcc_delta = librosa.feature.delta(mfcc) print "mfcc_delta" , mfcc_delta.shape # Stack and synchronize between beat events # This time, we'll use the mean value (default) instead of median beat_mfcc_delta = librosa.feature.sync(np.vstack([mfcc, mfcc_delta]), beat_frames) print "beat_mfcc_delta" , beat_mfcc_delta.shape chromagram = librosa.feature.chroma_cqt(y=y_harmonic, sr=sr) #12 c = np.mean(chromagram,axis=1) print "c", c.shape print "chromagram" , chromagram.shape r = calc_statistical_features(chromagram) print r.shape beat_chroma = librosa.feature.sync(chromagram, beat_frames, aggregate=np.median) print "beat_chroma" , beat_chroma.shape #print beat_chroma # Finally, stack all beat-synchronous features together beat_features = np.vstack([beat_chroma, beat_mfcc_delta]) print "beat_features" , beat_features.shape beat_feature_set = np.mean(beat_features,axis =1) print beat_feature_set.shape print beat_feature_set