def get_onsets(self, _audio=[]): if _audio != []: audio = _audio else: audio = self.audio W = es.Windowing(type=self.winType) c2p = es.CartesianToPolar() fft = es.FFT() onsetDetection = es.OnsetDetection(method=self.onsetMethod, sampleRate=44100) onsets = es.Onsets(alpha=.2) # onsetIndex = [] pool = Pool() for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase, = c2p(fft(W(frame))) onsetDetection.configure(method=self.onsetMethod) onsetFunction = onsetDetection(mag, phase) pool.add("onsetFunction", onsetFunction) DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1]) return DetectedOnsetsArray
def segment(audio, hopSize, frameSize, rms_onset_threshold, mel_onset_threshold, flux_onset_threshold, onset_threshold): # init algorithms o_mel = estd.OnsetDetection(method='melflux') o_rms = estd.OnsetDetection(method='rms') o_hfc = estd.OnsetDetection(method='hfc') o_flux = estd.OnsetDetection(method='flux') fft = estd.FFT() c2p = estd.CartesianToPolar() pool = essentia.Pool() frame_generator = estd.FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize) w = estd.Windowing(type='hann') yin = estd.PitchYinFFT(frameSize=frameSize, minFrequency=40, maxFrequency=2500, interpolate=True) spectrum = estd.Spectrum() loudness = estd.Loudness() # control parameters attack = False detection = True mel_onset_value = 0 rms_onset_value = 0 # output variables onset = None sustain = None for index, frame in enumerate(frame_generator): mag, phase = c2p(fft(w(frame))) _, conf = yin(spectrum(w(frame))) loud = loudness(frame) mel_onset = o_mel(mag, phase) rms_onset = o_rms(mag, phase) hfc_onset = o_hfc(mag, phase) flux_onset = o_flux(mag, phase) pool.add('onsets_mel', mel_onset) pool.add('onsets_rms', rms_onset) pool.add('onsets_hfc', hfc_onset) pool.add('onsets_flux', flux_onset) pool.add('conf', conf) pool.add('loudness', loud) # condition for onset if detection and (flux_onset > flux_onset_threshold or mel_onset > mel_onset_threshold) \ and rms_onset > rms_onset_threshold and loud > onset_threshold: onset = index attack = True detection = False mel_onset_value = mel_onset rms_onset_value = rms_onset # condition for beginning of sustain if attack and conf > 0.5 and rms_onset < rms_onset_value * .05 and mel_onset < mel_onset_value * .3: attack = False sustain = index return onset, sustain
def calculate_function(self): onset_func = [] fft = es.FFT() c2p = es.CartesianToPolar() for frame in es.FrameGenerator(self.signal, frameSize=self.frameSize, hopSize=self.hopSize): mag, phase, = c2p(fft(self.window(frame))) onset_func.append(self.calcOnsetFunc(mag, phase)) self.onsetFunction = np.array(onset_func, dtype=np.float32) self.onsetTime = np.arange( len(onset_func)) * (self.hopSize / self.sampleRate) self.onsetTime -= self.hopSize / self.sampleRate
def getOnsetFunctions(fname): logger = log.get_logger("rhythm") zeropadLen = params.Nfft - params.frmSize zz = np.zeros((zeropadLen, ), dtype='float32') frameCounter = 0 bufferFrame = np.zeros((params.Nfft / 2 + 1, )) logger.info('Reading audio file...') audio = ess.MonoLoader(filename=fname)() fft = ess.FFT(size=params.Nfft) # this gives us a complex FFT c2p = ess.CartesianToPolar( ) # and this turns it into a pair (magnitude, phase) pool = es.Pool() w = ess.Windowing(type="hamming") fTicks = params.fTicks poolName = 'features.flux' logger.info('Extracting Onset functions...') for frame in ess.FrameGenerator(audio, frameSize=params.frmSize, hopSize=params.hop): frmTime = params.hop / params.Fs * frameCounter + params.frmSize / ( 2.0 * params.Fs) zpFrame = np.hstack((frame, zz)) mag, phase, = c2p(fft(w(zpFrame))) magFlux = mag - bufferFrame bufferFrame = np.copy( mag) # Copying for the next iteration to compute flux for bands in range(params.numBands): chosenInd = (fTicks >= params.fBands[bands, 0]) & ( fTicks <= params.fBands[bands, 1]) magFluxBand = magFlux[chosenInd] magFluxBand = (magFluxBand + abs(magFluxBand)) / 2 oFn = magFluxBand.sum() if (math.isnan(oFn)): print("NaN found here") pass pool.add(poolName + str(bands), oFn) pass pool.add('features.time', frmTime) frameCounter += 1 if not np.mod(frameCounter, 10000): logger.info( str(frameCounter) + '/' + str(audio.size / params.hop) + '...') logger.info('Total frames processed = ' + str(frameCounter)) timeStamps = es.array([pool['features.time']]) all_feat = timeStamps for bands in range(params.numBands): feat_flux = es.array([pool[poolName + str(bands)]]) all_feat = np.vstack((all_feat, feat_flux)) pass return np.transpose(all_feat)
def OnsetsSegmentation(audio, frame_size=1024, frame_hop=512, windowing_type='hann', onsets_method='hfc'): #declaração dos algoritmos que serão usados spec = es_mode.Spectrum() fft = es_mode.FFT() c2p = es_mode.CartesianToPolar() od1 = es_mode.OnsetDetection(method=onsets_method) w = es_mode.Windowing(type=windowing_type) pool = es.Pool() #Função que será executada a cada frame def F(n): spectrum = spec(w(n)) mag, phase, = c2p(fft(w(n))) pool.add('features.spectrum', spectrum) pool.add('features.', phase) pool.add('features.onsetdetection', od1(spectrum, phase)) #define a função contínua de onsets para cada frame qtdFrames = inFrames(audio=audio, algorithm=F, frameSize=frame_size, hopSize=frame_hop) #print("Quantidade de frames: ", qtdFrames) audio_duration = es_mode.Duration()(audio) frame_rate = qtdFrames / audio_duration os = es_mode.Onsets(frameRate=frame_rate) #matriz de algoritmos de detecção de onset executados onset_detection_matrix = es.array([pool['features.onsetdetection']]) #segundo parâmetro é o vetor de pesos para cada detecção de onset onsets = os(onset_detection_matrix, [1]) end_times = es.array(np.append(onsets, audio_duration)) start_times = es.array(np.append([0], onsets)) segments = es_mode.Slicer(endTimes=end_times, startTimes=start_times, timeUnits="seconds")(audio) return segments, onsets
def __detect_onsets(self, file, frame_size, hop_size, windowfnc, normalize) -> None: window = estd.Windowing(size=frame_size, type=windowfnc.value, normalized=normalize) fft = estd.FFT(size=frame_size) pool = es.Pool() pool_add = pool.add cart_to_polar = estd.CartesianToPolar() detect_onset = estd.OnsetDetection(method=self.algo) for frame in estd.FrameGenerator(file.audio, frameSize=frame_size, hopSize=hop_size): mag, phase, = cart_to_polar(fft(window(frame))) pool_add( "features." + self.algo, detect_onset(mag, phase), ) # The onsets algo expects a matrix of features which can be weighted self.onsets = estd.Onsets()(es.array([pool["features." + self.algo]]), [1])
def detect_onset(audio, index): # should be able to fetch the module from cache import essentia.standard as ess_std from essentia import array print("Subprocess {} starts".format(index)) processing_start = time() onset_detector = ess_std.OnsetDetection(method="complex") window = ess_std.Windowing(type="hann") fft = ess_std.FFT() c2p = ess_std.CartesianToPolar() onsets = ess_std.Onsets() frames = [] for frame in ess_std.FrameGenerator(audio, frameSize=1024, hopSize=512): mag, phase = c2p(fft(window(frame))) frames.append(onset_detector(mag, phase)) onsets_array = onsets(array([frames]), [1]) print("Subprocess {} finished. Elapsed time: {:.2}s".format( index, time() - processing_start)) return onsets_array
def __init__(self, params, fsm=None): self.onset_threshold = params['onset_threshold'] self.offset_threshold = params['offset_threshold'] self.max_attack_time = params['max_attack_time'] self.max_release_time = params['max_release_time'] self.attack_slope_ratio = params['attack_slope_ratio'] self.release_slope_ratio = params['release_slope_ratio'] self.flux_threshold = params['flux_threshold'] self.mel_threshold = params['mel_threshold'] self.rms_threshold = params['rms_threshold'] self.conf_threshold = params['conf_threshold'] self.ratio_mel = params['ratio_mel'] self.ratio_rms = params['ratio_rms'] self.rms_threshold_value = 0 self.mel_threshold_vale = 0 self.fs = params['fs'] self.hop_size = params['hop_size'] self.max_attack_frames = seconds2frames(self.max_attack_time, fs=self.fs, hop_size=self.hop_size) self.max_release_frames = seconds2frames(self.max_release_time, fs=self.fs, hop_size=self.hop_size) self.ext_fsm = fsm # external state machine to send events to self.buffer = [] self.was_onset = False self.was_offset = False self.onset_counter = self.offset_counter = None self.onset_samples = 2 # number of consecutive samples to be above threshold self.offset_samples = 3 # number of consecutive samples to be below threshold self.peak_detect = GrowingSlopeEnd(max_frames=self.max_attack_frames, m=self.attack_slope_ratio) # essentia algorithms initialization self.o_mel = estd.OnsetDetection(method='melflux') self.o_rms = estd.OnsetDetection(method='rms') self.o_hfc = estd.OnsetDetection(method='hfc') self.o_flux = estd.OnsetDetection(method='flux') self.o_complex = estd.OnsetDetection(method='complex') self.fft = estd.FFT() self.c2p = estd.CartesianToPolar() self.w = estd.Windowing(type='hann') # STATE MACHINE self.fsm = Fysom({ 'initial': 'detecting', 'events': [{ 'name': 'onset', 'src': 'detecting', 'dst': 'attack' }, { 'name': 'peak', 'src': 'attack', 'dst': 'sustain' }, { 'name': 'offset', 'src': 'sustain', 'dst': 'detecting' }, { 'name': 'reset', 'src': ['detecting', 'attack', 'sustain'], 'dst': 'detecting' }], 'callbacks': { 'ondetecting': self.on_detecting, 'onattack': self.on_attack, 'onsustain': self.on_sustain, 'onbeforeonset': self.on_onset, 'onbeforepeak': self.on_peak, 'onbeforeoffset': self.on_offset } })