def testSinusoidalPlusNoise(self): from essentia import instantPower from essentia import db2amp frameSize = 512 hopSize = frameSize // 2 fs = 44100. time = 5. # s time_axis = np.arange(0, time, 1 / fs) nsamples = len(time_axis) noise = np.random.randn(nsamples) noise /= np.std(noise) noise_only = 1 signal = np.sin(2 * pi * 5000 * time_axis) signal_db = -22. noise_db = -50. signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs)) snr_gt = 10. * np.log10( (instantPower(esarr(db2amp(signal_db) * signal[int(noise_only * fs):]))) / (instantPower(esarr(db2amp(noise_db) * noise[int(noise_only * fs):]))))\ - 10. * np.log10(fs / 2.) signal_and_noise = esarr(db2amp(signal_db) * signal + db2amp(noise_db) * noise) noiseThreshold = -30 algo = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold) for frame in FrameGenerator(signal_and_noise, frameSize=frameSize, hopSize=hopSize): _, snr, _ = algo(frame) self.assertAlmostEqual(snr, snr_gt, 1e-1)
def testSinusoidalPlusNoise(self): from essentia import instantPower from essentia import db2amp frameSize = 512 hopSize = frameSize // 2 fs = 44100. time = 5. # s time_axis = np.arange(0, time, 1 / fs) nsamples = len(time_axis) noise = np.random.randn(nsamples) noise /= np.std(noise) noise_only = 1 signal = np.sin(2 * pi * 5000 * time_axis) signal_db = -22. noise_db = -50. signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs)) snr_gt = 10. * np.log10( (instantPower(esarr(db2amp(signal_db) * signal[int(noise_only * fs):]))) / (instantPower(esarr(db2amp(noise_db) * noise[int(noise_only * fs):]))))\ - 10. * np.log10(fs / 2.) signal_and_noise = esarr( db2amp(signal_db) * signal + db2amp(noise_db) * noise) noiseThreshold = -30 algo = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold) for frame in FrameGenerator(signal_and_noise, frameSize=frameSize, hopSize=hopSize): _, snr, _ = algo(frame) self.assertAlmostEqual(snr, snr_gt, 1e-1)
def is_silent_threshold(frame, silence_threshold_dB): p = essentia.instantPower( frame ) silence_threshold = pow(10.0, (silence_threshold_dB / 10.0)) if p < silence_threshold: return 1.0 else: return 0.0
def is_silent_threshold(frame, silence_threshold_dB): p = es.instantPower(frame) silence_threshold = pow(10.0, (silence_threshold_dB / 10.0)) if p < silence_threshold: return 1.0 else: return 0.0
def compute(self, *args): x = args[1] order = 12 LPC = es.LPC(order=order, type='regular') idx_ = 0 threshold = 10 powerEstimationThreshold = 10 silenceThreshold = db2pow(-50) detectionThreshold = db2pow(30) start_proc = int(frameSize / 2 - hopSize / 2) end_proc = int(frameSize / 2 + hopSize / 2) y = [] for frame in es.FrameGenerator(x, frameSize=frameSize, hopSize=hopSize, startFromZero=True): if instantPower(frame) < silenceThreshold: idx_ += 1 continue lpc, _ = LPC(frame) lpc /= np.max(lpc) e = es.IIR(numerator=lpc)(frame) e_mf = es.IIR(numerator=-lpc)(e[::-1])[::-1] # Thresholding th_p = np.max([self.robustPower(e, powerEstimationThreshold) *\ detectionThreshold, silenceThreshold]) detections = [i + start_proc for i, v in\ enumerate(e_mf[start_proc:end_proc]**2) if v >= th_p] if detections: starts = [detections[0]] ends = [] end = detections[0] for idx, d in enumerate(detections[1:], 1): if d == detections[idx - 1] + 1: end = d else: ends.append(end) starts.append(d) end = d ends.append(end) for start in starts: y.append((start + idx_ * hopSize) / 44100.) # for end in ends: # y.append((end + idx_ * hopSize) / 44100.) idx_ += 1 return esarr(y)
def compute(self, *args): x = args[1] order = 12 LPC = es.LPC(order=order, type='regular') idx_ = 0 threshold = 10 powerEstimationThreshold = 10 silenceThreshold = db2pow(-50) detectionThreshold = db2pow(30) start_proc = int(frameSize / 2 - hopSize / 2) end_proc = int(frameSize / 2 + hopSize / 2) y = [] for frame in es.FrameGenerator(x, frameSize=frameSize, hopSize=hopSize, startFromZero=True): if instantPower(frame) < silenceThreshold: idx_ += 1 continue lpc, _ = LPC(frame) lpc /= np.max(lpc) e = es.IIR(numerator=lpc)(frame) e_mf = es.IIR(numerator=-lpc)(e[::-1])[::-1] # Thresholding th_p = np.max([self.robustPower(e, powerEstimationThreshold) *\ detectionThreshold, silenceThreshold]) detections = [i + start_proc for i, v in\ enumerate(e_mf[start_proc:end_proc]**2) if v >= th_p] if detections: starts = [detections[0]] ends = [] end = detections[0] for idx, d in enumerate(detections[1:], 1): if d == detections[idx-1] + 1: end = d else: ends.append(end) starts.append(d) end = d ends.append(end) for start in starts: y.append((start + idx_ * hopSize) / 44100.) # for end in ends: # y.append((end + idx_ * hopSize) / 44100.) idx_ += 1 return esarr(y)
def compute(self, *args): eps = (np.finfo(np.float32).eps) def SNR_prior_est(alpha, mmse, noise_pow, snr_inst): return alpha * (np.abs(mmse) ** 2) / noise_pow + (1 - alpha) *\ np.clip(snr_inst, a_min=0, a_max=None) def update_noise_psd(noise_spectrum, noise, alpha=.98): return alpha * noise_spectrum + (1 - alpha) * np.abs(noise) ** 2 def update_y(mean_y, y, alpha=.98): return alpha * mean_y + (1 - alpha) * y def MMSE(v, snr_post, Y): g = 0.8862269254527579 # gamma(1.5) output = np.zeros(len(v)) for idx in range(len(Y)): if v[idx] > 10: output[idx] = v[idx] * Y[idx] / snr_post[idx] else: output[idx] = g * ( np.sqrt(v[idx]) / (snr_post[idx] + eps)) *\ np.exp(-v[idx] / 2.) *\ ((1 + v[idx]) * iv(0., v[idx] / 2.) +\ v[idx] * iv(1., v[idx] / 2.)) * Y[idx] return output def SNR_post_est(Y, noise_pow): return np.abs(Y) ** 2 / noise_pow def SNR_inst_est(snr_post_est): return snr_post_est - 1. def V(snr_prior, snr_post): return (snr_prior / (1. + snr_prior)) * snr_post x = esarr(args[1]) asume_gauss_psd = args[2] idx_ = 0 silenceThreshold = db2pow(noiseThreshold) MMSE_alpha = .98 noise_alpha = .9 snr_alpha = .95 y = [] noise_psd = np.zeros(frameSize // 2 + 1, dtype=np.float32) previous_snr_prior = np.zeros(frameSize // 2 + 1, dtype=np.float32) previous_snr_inst = np.zeros(frameSize // 2 + 1, dtype=np.float32) previous_snr_post = np.zeros(frameSize // 2 + 1, dtype=np.float32) previous_Y = np.zeros(frameSize // 2 + 1, dtype=np.float32) previous_noise_psd = np.zeros(frameSize // 2 + 1, dtype=np.float32) noise_std = 0 ma_snr_average = 0 spectrum = es.Spectrum(size=frameSize) window = es.Windowing(size=frameSize, type='hann', normalized=False) for frame in es.FrameGenerator(x, frameSize=frameSize, hopSize=hopSize, startFromZero=True): Y = spectrum(window(frame)) if instantPower(frame) < silenceThreshold: noise_psd = update_noise_psd(noise_psd, Y, alpha=noise_alpha) snr_post = SNR_post_est(Y, noise_psd) snr_inst = SNR_inst_est(snr_post) else: if np.sum(previous_snr_prior) == 0: previous_snr_prior = MMSE_alpha + (1 - MMSE_alpha) * np.clip(previous_snr_inst, a_min=0., a_max=None) if 0: noise_psd = np.ones(frameSize / 2 + 1) * np.mean(noise_psd) snr_post = SNR_post_est(Y, noise_psd) snr_inst = SNR_inst_est(snr_post) v = V(previous_snr_prior, previous_snr_post) previous_mmse = MMSE(v, previous_snr_post, previous_Y) snr_prior = SNR_prior_est(MMSE_alpha, previous_mmse, previous_noise_psd, snr_inst) X_psd_est = noise_psd * snr_prior snr_average = np.mean(X_psd_est) / np.mean(noise_psd) ma_snr_average = update_y(ma_snr_average, snr_average, alpha=snr_alpha) previous_snr_prior = snr_prior previous_noise_psd = noise_psd previous_snr_post = snr_post previous_snr_inst = snr_inst previous_Y = Y idx_ += 1 return esarr([ma_snr_average])
for asume_gauss_psd in [0]: for noise_only in noise_durations: results = [] gt = [] for i in range(1): noise = np.random.randn(nsamples) noise /= np.std(noise) signal = np.sin(2 * pi * 5000 * time_axis) signal_db = -22. noise_db = -50. noise_var = instantPower(esarr(db2amp(noise_db) * noise)) signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs)) real_snr_prior = 10. * np.log10( (instantPower(esarr(db2amp(signal_db) * signal[int(noise_only * fs):]))) / (instantPower(esarr(db2amp(noise_db) * noise[int(noise_only * fs):])))) real_snr_prior_esp_corrected = real_snr_prior - 10. * np.log10(fs / 2.) gt.append(real_snr_prior_esp_corrected) signal_and_noise = esarr(db2amp(signal_db) * signal + db2amp(noise_db) * noise) ma_snr_average = qa.wrappers['Dev'].compute(None, signal_and_noise, asume_gauss_psd, noise_alpha) mean_snr_estimation = 10 * np.log10(ma_snr_average) mean_snr_estimation_corrected = mean_snr_estimation - 10. * np.log10(fs / 2.) print('with dev, error: {:.3f}dB'.format(np.abs(mean_snr_estimation_corrected[0] - real_snr_prior_esp_corrected)))
def compute(self, *args): y = [] x = args[1] for frame_idx, frame in enumerate(es.FrameGenerator(x, frameSize=self.frame_size, hopSize=self.hop_size, startFromZero=True)): # frame = es.essentia.normalize(frame) # updating buffers for gap in self._gaps: if not gap['finished'] and not gap['active']: last = np.min([self.frame_size, gap['take']]) gap['take'] -= last gap['buffer'] = np.hstack([gap['buffer'], frame[:last]]) if gap['take'] <= 0: gap['finished'] = True remove_idx = [] for gap_idx, gap in enumerate(self._gaps): if gap['finished']: remove_idx.append(gap_idx) postpower = instantPower(esarr(gap['buffer'])) if postpower > self._prepower_threshold: if self.min_time <= gap['end'] - gap['start'] <= self.max_time: y.append(gap['start']) remove_idx.sort(reverse=True) for i in remove_idx: self._gaps.pop(i) x1 = self.envelope(frame) x2 = esarr(x1 > self._threshold) x3 = self.medianFilter(x2).round().astype(int) x3_d = np.zeros(len(x3)) start_proc = int(self.frame_size / 2 - self.hop_size / 2) end_proc = int(self.frame_size / 2 + self.hop_size / 2) for i in range(start_proc, end_proc): x3_d[i] = x3[i] - x3[i-1] s_dx = np.argwhere(x3_d == -1) e_dx = np.argwhere(x3_d == 1) # initializing if s_dx.size: offset = frame_idx * self.hop_size for s in s_dx: s = s[0] take_from_buffer = s - self._prepower_samples if take_from_buffer > 0: prepower = instantPower(frame[take_from_buffer:s]) else: prepower = instantPower(esarr(np.hstack([self.l_buffer[-np.abs(take_from_buffer):], frame[:s]]))) if prepower > self._prepower_threshold: self._gaps.append({'start': (offset + s) / self.fs, 'end': 0, 'buffer': [], 'take': 0, 'active': True, 'finished': False}) # finishing if e_dx.size and self._gaps: offset = frame_idx * self.hop_size for e in e_dx: e = e[0] take_from_next_frame = np.max([(self._prepower_samples + e) - self.frame_size, 0]) for gap in self._gaps: if gap['active']: gap['take'] = take_from_next_frame gap['end'] = (offset + e) / self.fs last = np.min([self.frame_size, e + self._prepower_samples]) gap['buffer'] = frame[e: last] gap['active'] = False break # update buffers update_num = np.min([self._prepower_samples, self.hop_size]) np.roll(self.l_buffer, -update_num) self.l_buffer[-update_num:] = frame[-update_num:] self._gaps = [] return esarr(y)
def compute(audio, pool, options): sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] zeroPadding = options['zeroPadding'] windowType = options['windowType'] frameRate = float(sampleRate) / float(frameSize - hopSize) INFO('Computing Onset Detection...') frames = essentia.FrameGenerator(audio=audio, frameSize=frameSize, hopSize=hopSize) window = essentia.Windowing(size=frameSize, zeroPadding=zeroPadding, type=windowType) fft = essentia.FFT() cartesian2polar = essentia.CartesianToPolar() onsetdetectionHFC = essentia.OnsetDetection(method="hfc", sampleRate=sampleRate) onsetdetectionComplex = essentia.OnsetDetection(method="complex", sampleRate=sampleRate) onsets = essentia.Onsets(frameRate=frameRate) total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize * 0.5 hfc = [] complex = [] progress = Progress(total=total_frames) for frame in frames: if essentia.instantPower(frame) < 1.e-4: total_frames -= 1 start_of_frame += hopSize hfc.append(0.) complex.append(0.) continue windowed_frame = window(frame) complex_fft = fft(windowed_frame) (spectrum, phase) = cartesian2polar(complex_fft) hfc.append(onsetdetectionHFC(spectrum, phase)) complex.append(onsetdetectionComplex(spectrum, phase)) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize # The onset rate is defined as the number of onsets per seconds detections = numpy.concatenate( [essentia.array([hfc]), essentia.array([complex])]) # prune all 'doubled' detections time_onsets = list(onsets(detections, essentia.array([1, 1]))) t = 1 while t < len(time_onsets): if time_onsets[t] - time_onsets[t - 1] < 0.080: time_onsets.pop(t) else: t += 1 onsetrate = len(time_onsets) / (len(audio) / sampleRate) pool.add(namespace + '.' + "onset_times", essentia.array(time_onsets)) #, pool.GlobalScope) pool.add(namespace + '.' + "onset_rate", onsetrate) #, pool.GlobalScope) progress.finish()
def compute(audio, pool, options): sampleRate = options['sampleRate'] frameSize = options['frameSize'] hopSize = options['hopSize'] zeroPadding = options['zeroPadding'] windowType = options['windowType'] frameRate = float(sampleRate)/float(frameSize - hopSize) INFO('Computing Onset Detection...') frames = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize) window = essentia.Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType) fft = essentia.FFT() cartesian2polar = essentia.CartesianToPolar() onsetdetectionHFC = essentia.OnsetDetection(method = "hfc", sampleRate = sampleRate) onsetdetectionComplex = essentia.OnsetDetection(method = "complex", sampleRate = sampleRate) onsets = essentia.Onsets(frameRate = frameRate) total_frames = frames.num_frames() n_frames = 0 start_of_frame = -frameSize*0.5 hfc = [] complex = [] progress = Progress(total = total_frames) for frame in frames: if essentia.instantPower(frame) < 1.e-4 : total_frames -= 1 start_of_frame += hopSize hfc.append(0.) complex.append(0.) continue windowed_frame = window(frame) complex_fft = fft(windowed_frame) (spectrum,phase) = cartesian2polar(complex_fft) hfc.append(onsetdetectionHFC(spectrum,phase)) complex.append(onsetdetectionComplex(spectrum,phase)) # display of progress report progress.update(n_frames) n_frames += 1 start_of_frame += hopSize # The onset rate is defined as the number of onsets per seconds detections = numpy.concatenate([essentia.array([hfc]), essentia.array([complex]) ]) # prune all 'doubled' detections time_onsets = list(onsets(detections, essentia.array([1, 1]))) t = 1 while t < len(time_onsets): if time_onsets[t] - time_onsets[t-1] < 0.080: time_onsets.pop(t) else: t += 1 onsetrate = len(time_onsets) / ( len(audio) / sampleRate ) pool.add(namespace + '.' + "onset_times", essentia.array(time_onsets))#, pool.GlobalScope) pool.add(namespace + '.' + "onset_rate", onsetrate)#, pool.GlobalScope) progress.finish()
for noise_alpha in [.9]: for asume_gauss_psd in [0]: for noise_only in noise_durations: results = [] gt = [] for i in range(1): noise = np.random.randn(nsamples) noise /= np.std(noise) signal = np.sin(2 * pi * 5000 * time_axis) signal_db = -22. noise_db = -50. noise_var = instantPower(esarr(db2amp(noise_db) * noise)) signal[:int(noise_only * fs)] = np.zeros( int(noise_only * fs)) real_snr_prior = 10. * np.log10((instantPower( esarr( db2amp(signal_db) * signal[int(noise_only * fs):]) )) / (instantPower( esarr( db2amp(noise_db) * noise[int(noise_only * fs):])))) real_snr_prior_esp_corrected = real_snr_prior - 10. * np.log10( fs / 2.) gt.append(real_snr_prior_esp_corrected) signal_and_noise = esarr( db2amp(signal_db) * signal + db2amp(noise_db) * noise)
def compute(self, *args): y = [] x = args[1] for frame_idx, frame in enumerate( es.FrameGenerator(x, frameSize=self.frame_size, hopSize=self.hop_size, startFromZero=True)): # frame = es.essentia.normalize(frame) # updating buffers for gap in self._gaps: if not gap['finished'] and not gap['active']: last = np.min([self.frame_size, gap['take']]) gap['take'] -= last gap['buffer'] = np.hstack([gap['buffer'], frame[:last]]) if gap['take'] <= 0: gap['finished'] = True remove_idx = [] for gap_idx, gap in enumerate(self._gaps): if gap['finished']: remove_idx.append(gap_idx) postpower = instantPower(esarr(gap['buffer'])) if postpower > self._prepower_threshold: if self.min_time <= gap['end'] - gap[ 'start'] <= self.max_time: y.append(gap['start']) remove_idx.sort(reverse=True) for i in remove_idx: self._gaps.pop(i) x1 = self.envelope(frame) x2 = esarr(x1 > self._threshold) x3 = self.medianFilter(x2).round().astype(int) x3_d = np.zeros(len(x3)) start_proc = int(self.frame_size / 2 - self.hop_size / 2) end_proc = int(self.frame_size / 2 + self.hop_size / 2) for i in range(start_proc, end_proc): x3_d[i] = x3[i] - x3[i - 1] s_dx = np.argwhere(x3_d == -1) e_dx = np.argwhere(x3_d == 1) # initializing if s_dx.size: offset = frame_idx * self.hop_size for s in s_dx: s = s[0] take_from_buffer = s - self._prepower_samples if take_from_buffer > 0: prepower = instantPower(frame[take_from_buffer:s]) else: prepower = instantPower( esarr( np.hstack([ self.l_buffer[-np.abs(take_from_buffer):], frame[:s] ]))) if prepower > self._prepower_threshold: self._gaps.append({ 'start': (offset + s) / self.fs, 'end': 0, 'buffer': [], 'take': 0, 'active': True, 'finished': False }) # finishing if e_dx.size and self._gaps: offset = frame_idx * self.hop_size for e in e_dx: e = e[0] take_from_next_frame = np.max([ (self._prepower_samples + e) - self.frame_size, 0 ]) for gap in self._gaps: if gap['active']: gap['take'] = take_from_next_frame gap['end'] = (offset + e) / self.fs last = np.min( [self.frame_size, e + self._prepower_samples]) gap['buffer'] = frame[e:last] gap['active'] = False break # update buffers update_num = np.min([self._prepower_samples, self.hop_size]) np.roll(self.l_buffer, -update_num) self.l_buffer[-update_num:] = frame[-update_num:] self._gaps = [] return esarr(y)