def hfc(filename): audio = MonoLoader(filename=filename, sampleRate=44100)() features = [] for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mag, phase =CartesianToPolar()(FFT()(Windowing(type='hann')(frame))) features.append(OnsetDetection(method='hfc')(mag, phase)) return Onsets()(array([features]),[1])
def noveltycurve(filename): audio = MonoLoader(filename=filename, sampleRate=44100)() band_energy = [] for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512): mag, phase, = CartesianToPolar()(FFT()(Windowing(type='hann')(frame))) band_energy.append(FrequencyBands()(mag)) novelty = NoveltyCurve()(band_energy) return Onsets()(np.array([novelty]),[1])
def calculateDownbeats(self, audio, bpm, phase): # Step 0: calculate the CSD (Complex Spectral Difference) features # and the associated onset detection function ON LOWPASSED SIGNAL spec = Spectrum(size=self.FRAME_SIZE) w = Windowing(type='hann') fft = FFT() c2p = CartesianToPolar() od_csd = OnsetDetection(method='complex') lowpass = LowPass(cutoffFrequency=1500) pool = Pool() # TODO test faster (numpy) way #audio = lowpass(audio) for frame in FrameGenerator(audio, frameSize=self.FRAME_SIZE, hopSize=self.HOP_SIZE): mag, ph = c2p(fft(w(frame))) pool.add('onsets.complex', od_csd(mag, ph)) # Step 1: normalise the data using an adaptive mean threshold novelty_mean = self.adaptive_mean(pool['onsets.complex'], 16.0) # Step 2: half-wave rectify the result novelty_hwr = (pool['onsets.complex'] - novelty_mean).clip(min=0) # Step 7 (experimental): Determine downbeat locations as subsequence with highest complex spectral difference for i in range(4): phase_frames = (phase * 44100.0) / (512.0) frames = ( np.round( np.arange(phase_frames + i * self.numFramesPerBeat(bpm), np.size(novelty_hwr), 4 * self.numFramesPerBeat(bpm))).astype('int') )[: -1] # Discard last value to prevent reading beyond array (last value rounded up for example) pool.add('output.downbeat', np.sum(novelty_hwr[frames]) / np.size(frames)) plt.subplot(4, 1, i + 1) plt.plot(novelty_hwr) for f in frames: plt.axvline(x=f) print pool['output.downbeat'] downbeatIndex = np.argmax(pool['output.downbeat']) plt.show() # experimental return 1.0 * self.beats[downbeatIndex::4]
def __call__(self, audio, SR, sumThreshold=1e-5): self.__reset__() if audio.ndim > 1: audio = np.sum(audio, axis=1) / audio.ndim fcIndexArr = [] self.hist = np.zeros(int(self.frameSize / 2 + 1)) fft = FFT(size=self.frameSize) # declare FFT function window = Windowing(size=self.frameSize, type="hann") # declare windowing function self.avgFrames = np.zeros(int(self.frameSize / 2) + 1) maxNrg = max([ sum(abs(fft(window(frame)))**2) for frame in FrameGenerator(audio, frameSize=self.frameSize, hopSize=self.hopSize, startFromZero=True) ]) for i, frame in enumerate( FrameGenerator(audio, frameSize=self.frameSize, hopSize=self.hopSize, startFromZero=True)): frame = window(frame) # apply window to the frame frameFft = abs(fft(frame)) nrg = sum(frameFft**2) if nrg >= 0.1 * maxNrg: for j in reversed(range(len(frameFft))): if sum(frameFft[j:] / j) >= sumThreshold: fcIndexArr.append(j) self.hist[j] += nrg break self.avgFrames = self.avgFrames + frameFft if len(fcIndexArr) == 0: fcIndexArr.append(int(self.frameSize / 2) + 1) self.hist[int(self.frameSize / 2)] += 1 self.avgFrames /= (i + 1) self.mostLikelyBin, conf, binary = self.__computeMeanFc( fcIndexArr, np.arange(int(self.frameSize / 2) + 2), hist=self.hist) return self.mostLikelyBin * SR / self.frameSize, conf, binary
def ninos(filename,gamma=0.94): """ reference: Mounir, M., Karsmakers, P., & Van Waterschoot, T. (2016). Guitar note onset detection based on a spectral sparsity measure. European Signal Processing Conference. https://doi.org/10.1109/EUSIPCO.2016.7760394 """ N = 2048 hopSize = int(N/10) J = int(N*gamma/2) audio = MonoLoader(filename=filename, sampleRate=44100)() mag = [] for frame in FrameGenerator(audio, frameSize = N, hopSize = hopSize): m = CartesianToPolar()(FFT()(Windowing(type='hann')(frame)))[0] m = np.asarray(m) idx = np.argsort(m)[::-1][:J] mag.append(m[idx]) mag = np.asarray(mag) x2 = mag*mag inos=np.sum(x2,axis=1)/(np.sum(x2*x2,axis=1)**(0.25)) ninos = inos/(J**(0.25)) return OnsetPeakPickingProcessor(threshold=0.03,fps=44100/hopSize)(ninos)
def detectBW(audio: list, SR: float, frame_size=256, hop_size=128, floor_db=-90, oversample_f=1): frame_size *= oversample_f # if an oversample factor is desired, apply it fc_index_arr = [] fft = FFT(size=frame_size) # declare FFT function window = Windowing(size=frame_size, type="hann") # declare windowing function for frame in FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): frame_fft = abs(fft(window(frame))) frame_fft_db = 20 * np.log10( frame_fft + eps) # calculate frame fft values in db # compute the linear interpolation between the values of the maxima of the spectrum interp_frame = compute_spectral_envelope(frame_fft_db, "linear") interp_frame = modify_floor(interp_frame, floor_db, log=True) fc_index = compute_fc(interp_frame) if energy_verification(frame_fft, fc_index): fc_index_arr.append(fc_index) if len(fc_index_arr) == 0: fc_index_arr = [frame_size] fc_bin, conf, binary = compute_mean_fc(fc_index_arr, np.arange(len(frame_fft)), SR) # print("mean_fc: ", fc_bin*SR/frame_size ," conf: ", conf ," binary_result: ", binary) return fc_bin * SR / frame_size, conf, binary
def f_essentia_extract(Audio): ## METODOS DE LIBRERIA QUE DETECTAN DONDE OCURRE CADA NOTA RESPECTO AL TIEMPO od2 = OnsetDetection(method='complex') # Let's also get the other algorithms we will need, and a pool to store the results w = Windowing(type='hann') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar( ) # and this turns it into a pair (magnitude, phase) pool = essentia.Pool() # Computing onset detection functions. for frame in FrameGenerator(Audio, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(w(frame))) pool.add('features.complex', od2(mag, phase)) ## inicio de cada "nota" onsets = Onsets() tiempos_detectados_essentia = onsets( essentia.array([pool['features.complex']]), [1]) #print(tiempos_detectados_essentia) return tiempos_detectados_essentia
def __init__(self): super().__init__() self._eq = np.array([1]) self.eq = [1] self.fft = FFT() self.ifft = IFFT()
from build_map import build_map sampleRate = 44100 frameSize = 2048 hopSize = 1024 windowType = "hann" mean = Mean() keyDetector = essentia.standard.Key(pcpSize=12) spectrum = Spectrum() window = Windowing(size=frameSize, zeroPadding=0, type=windowType) mfcc = MFCC() gaussian = SingleGaussian() od = OnsetDetection(method='hfc') fft = FFT() # this gives us a complex FFT c2p = CartesianToPolar() # and this turns it into a pair (magnitude, phase) onsets = Onsets(alpha=1) # dissonance spectralPeaks = SpectralPeaks(sampleRate=sampleRate, orderBy='frequency') dissonance = Dissonance() # barkbands barkbands = BarkBands(sampleRate=sampleRate) # zero crossing rate # zerocrossingrate = ZeroCrossingRate() # odd-to-even harmonic energy ratio # odd2evenharmonicenergyratio = OddToEvenHarmonicEnergyRatio()
def run(self, audio): # TODO put this in some util class # Step 0: calculate the CSD (Complex Spectral Difference) features # and the associated onset detection function spec = Spectrum(size=self.FRAME_SIZE) w = Windowing(type='hann') fft = FFT() c2p = CartesianToPolar() od_csd = OnsetDetection(method='complex') pool = Pool() # TODO test faster (numpy) way for frame in FrameGenerator(audio, frameSize=self.FRAME_SIZE, hopSize=self.HOP_SIZE): mag, phase = c2p(fft(w(frame))) pool.add('onsets.complex', od_csd(mag, phase)) # Step 1: normalise the data using an adaptive mean threshold novelty_mean = self.adaptive_mean(pool['onsets.complex'], 16.0) # Step 2: half-wave rectify the result novelty_hwr = (pool['onsets.complex'] - novelty_mean).clip(min=0) # Step 3: then calculate the autocorrelation of this signal novelty_autocorr = self.autocorr(novelty_hwr) # Step 4: Sum over constant intervals to detect most likely BPM valid_bpms = np.arange(self.minBpm, self.maxBpm, self.stepBpm) for bpm in valid_bpms: frames = ( np.round( np.arange(0, np.size(novelty_autocorr), self.numFramesPerBeat(bpm))).astype('int') )[: -1] # Discard last value to prevent reading beyond array (last value rounded up for example) pool.add('output.bpm', np.sum(novelty_autocorr[frames]) / np.size(frames)) bpm = valid_bpms[np.argmax(pool['output.bpm'])] # Step 5: Calculate phase information valid_phases = np.arange(0.0, 60.0 / bpm, 0.001) # Valid phases in SECONDS for phase in valid_phases: # Convert phase from seconds to frames phase_frames = (phase * 44100.0) / (512.0) frames = ( np.round( np.arange(phase_frames, np.size(novelty_hwr), self.numFramesPerBeat(bpm))).astype('int') )[: -1] # Discard last value to prevent reading beyond array (last value rounded up for example) pool.add('output.phase', np.sum(novelty_hwr[frames]) / np.size(frames)) phase = valid_phases[np.argmax(pool['output.phase'])] print 'PHASE', phase # Step 6: Determine the beat locations spb = 60. / bpm #seconds per beat beats = (np.arange(phase, (np.size(audio) / 44100) - spb + phase, spb).astype('single')) # Store all the results self.bpm = bpm self.phase = phase self.beats = beats self.downbeats = self.calculateDownbeats(audio, bpm, phase)