def testSinusoidalPlusNoise(self): from essentia import instantPower from essentia import db2amp frameSize = 512 hopSize = frameSize // 2 fs = 44100. time = 5. # s time_axis = np.arange(0, time, 1 / fs) nsamples = len(time_axis) noise = np.random.randn(nsamples) noise /= np.std(noise) noise_only = 1 signal = np.sin(2 * pi * 5000 * time_axis) signal_db = -22. noise_db = -50. signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs)) snr_gt = 10. * np.log10( (instantPower(esarr(db2amp(signal_db) * signal[int(noise_only * fs):]))) / (instantPower(esarr(db2amp(noise_db) * noise[int(noise_only * fs):]))))\ - 10. * np.log10(fs / 2.) signal_and_noise = esarr( db2amp(signal_db) * signal + db2amp(noise_db) * noise) noiseThreshold = -30 algo = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold) for frame in FrameGenerator(signal_and_noise, frameSize=frameSize, hopSize=hopSize): _, snr, _ = algo(frame) self.assertAlmostEqual(snr, snr_gt, 1e-1)
def testSinusoidalPlusNoise(self): from essentia import instantPower from essentia import db2amp frameSize = 512 hopSize = frameSize // 2 fs = 44100. time = 5. # s time_axis = np.arange(0, time, 1 / fs) nsamples = len(time_axis) noise = np.random.randn(nsamples) noise /= np.std(noise) noise_only = 1 signal = np.sin(2 * pi * 5000 * time_axis) signal_db = -22. noise_db = -50. signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs)) snr_gt = 10. * np.log10( (instantPower(esarr(db2amp(signal_db) * signal[int(noise_only * fs):]))) / (instantPower(esarr(db2amp(noise_db) * noise[int(noise_only * fs):]))))\ - 10. * np.log10(fs / 2.) signal_and_noise = esarr(db2amp(signal_db) * signal + db2amp(noise_db) * noise) noiseThreshold = -30 algo = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold) for frame in FrameGenerator(signal_and_noise, frameSize=frameSize, hopSize=hopSize): _, snr, _ = algo(frame) self.assertAlmostEqual(snr, snr_gt, 1e-1)
def testOnes(self): input = esarr(np.ones(512)) input[0] = 0 input[-1] = 0 self.assertAlmostEqualVector( SaturationDetector(hopSize=512)(input)[0], esarr([0.]), 1e-4)
def compute(self, *args): from math import pi x = args[1] for frame in es.FrameGenerator(x, frameSize=self._frameSize, hopSize=self._hopSize, startFromZero=True): y = [] s = int(self._frameSize / 2 - self._hopSize / 2) - 1 # consider non overlapping case e = int(self._frameSize / 2 + self._hopSize / 2) # Stage 1: Attenuation. Is not required because we are using float point. # Stage 2: Resample yResample = es.Resample(inputSampleRate=self._sampleRate, outputSampleRate=self._sampleRateOver, quality=self._quality)(frame) # Stage 3: Emphasis if self._emphatise: fPole = 20e3 # Hz fZero = 14.1e3 rPole = fPole / self._sampleRateOver rZero = fZero / self._sampleRateOver yEmphasis = es.IIR(denominator=esarr([1., rPole]), numerator=esarr([1., -rZero]))(yResample) else: yEmphasis = yResample # Stage 4 Absolute yMaxArray = np.abs(yEmphasis) # Stage 5 optional DC Block if self._BlockDC: yDCBlocked = es.DCRemoval(sampleRate=self._sampleRate, cutoffFrequency=1.)(yEmphasis) yAbsoluteDCBlocked = np.abs(yDCBlocked) yMaxArray = np.maximum(yMaxArray, yAbsoluteDCBlocked) y = [ ((i + self._idx * self._hopSize) / float(self._sampleRateOver), yMax) for i, yMax in enumerate(yMaxArray) if yMax > self._clippingThreshold ] self._idx += 1 return esarr(y)
def compute(self, *args): from math import pi x = args[1] for frame in es.FrameGenerator(x, frameSize=self._frameSize, hopSize=self._hopSize, startFromZero=True): y = [] s = int(self._frameSize / 2 - self._hopSize / 2) - 1 # consider non overlapping case e = int(self._frameSize / 2 + self._hopSize / 2) # Stage 1: Attenuation. Is not required because we are using float point. # Stage 2: Resample yResample = es.Resample(inputSampleRate=self._sampleRate, outputSampleRate=self._sampleRateOver, quality=self._quality)(frame) # Stage 3: Emphasis if self._emphatise: fPole = 20e3 # Hz fZero = 14.1e3 rPole = fPole / self._sampleRateOver rZero = fZero / self._sampleRateOver yEmphasis = es.IIR(denominator=esarr([1., rPole]), numerator=esarr([1., -rZero]))(yResample) else: yEmphasis = yResample # Stage 4 Absolute yMaxArray = np.abs(yEmphasis) # Stage 5 optional DC Block if self._BlockDC: yDCBlocked = es.DCRemoval(sampleRate=self._sampleRate, cutoffFrequency=1.)(yEmphasis) yAbsoluteDCBlocked = np.abs(yDCBlocked) yMaxArray = np.maximum(yMaxArray, yAbsoluteDCBlocked) y = [((i + self._idx * self._hopSize) / float(self._sampleRateOver), yMax) for i, yMax in enumerate(yMaxArray) if yMax > self._clippingThreshold] self._idx += 1 return esarr(y)
def testOnes(self): size = 200000 # apx. 4.5s @ 44.1kHz while size > 1000: self.assertEqualVector( self.InitStartStopCut()( esarr(numpy.ones(size))), (1, 1)) size //= 2
def testBroadbandNoiseCorrection(self): from essentia import instantPower from essentia import db2amp frameSize = 512 hopSize = frameSize // 2 fs = 44100. time = 1. # s time_axis = np.arange(0, time, 1 / fs) nsamples = len(time_axis) noise = np.random.randn(nsamples) noise /= np.std(noise) noise_only = .2 signal = np.sin(2 * pi * 5000 * time_axis) signal_db = -22. noise_db = -50. signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs)) signal_and_noise = esarr(db2amp(signal_db) * signal + db2amp(noise_db) * noise) noiseThreshold = -30 corrected = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold) notCorrected = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold, useBroadbadNoiseCorrection=False) for frame in FrameGenerator(signal_and_noise, frameSize=frameSize, hopSize=hopSize): _, snrCorrected, _ = corrected(frame) _, snrNotCorrected, _ = notCorrected(frame) self.assertAlmostEqual(snrCorrected, snrNotCorrected - 10. * np.log10(fs / 2), 1e-4)
def testRegression(self, frameSize=512, hopSize=256): fs = 44100 audio = MonoLoader(filename=join(testdata.audio_dir, 'recorded/cat_purrrr.wav'), sampleRate=fs)() originalLen = len(audio) startJump = originalLen // 4 groundTruth = [startJump / float(fs)] # Make sure that the artificial jump produces a prominent discontinuity. if audio[startJump] > 0: end = next(idx for idx, i in enumerate(audio[startJump:]) if i < -.3) else: end = next(idx for idx, i in enumerate(audio[startJump:]) if i > .3) endJump = startJump + end audio = esarr(numpy.hstack([audio[:startJump], audio[endJump:]])) frameList = [] discontinuityDetector = self.InitDiscontinuityDetector( frameSize=frameSize, hopSize=hopSize, detectionThreshold=10) for idx, frame in enumerate(FrameGenerator( audio, frameSize=frameSize, hopSize=hopSize, startFromZero=True)): locs, _ = discontinuityDetector(frame) if not len(locs) == 0: for loc in locs: frameList.append((idx * hopSize + loc) / float(fs)) self.assertAlmostEqualVector(frameList, groundTruth, 1e-7)
def testZero(self): # Test different input sizes. size = 200000 # apx. 4.5s @ 44.1kHz while size > 1000: self.assertEqualVector( self.InitStartStopCut()(esarr(numpy.zeros(size))), (0, 0)) size //= 2
def testRegression(self, frameSize=512, hopSize=256): fs = 44100. audio = MonoLoader(filename=join(testdata.audio_dir, 'recorded/vignesh.wav'), sampleRate=fs)() originalLen = len(audio) jumpLocation1 = int(originalLen / 4.) jumpLocation2 = int(originalLen / 2.) jumpLocation3 = int(originalLen * 3 / 4.) audio[jumpLocation1] += .1 audio[jumpLocation2] += .08 audio[jumpLocation3] += .05 groundTruth = esarr([jumpLocation1, jumpLocation2, jumpLocation3]) / fs clickStarts = [] clickEnds = [] clickdetector = ClickDetector(frameSize=frameSize, hopSize=hopSize) for frame in FrameGenerator(audio, frameSize=frameSize, hopSize=hopSize, startFromZero=True): starts, ends = clickdetector(frame) if not len(starts) == 0: for start in starts: clickStarts.append(start) for end in ends: clickEnds.append(end) self.assertAlmostEqualVector(clickStarts, groundTruth, 1e-5) self.assertAlmostEqualVector(clickEnds, groundTruth, 1e-5)
def lowSNR_detector(audio: list, frame_size=1024, hop_size=512, nrg_th=0.1, ac_th=0.6, snr_th=5): if audio.shape[1] > 1: audio = np.reshape(audio, audio.shape[0] * audio.shape[1], order='F') audio = audio.astype("float32") / max(audio.astype("float32")) audio = esarr(audio.astype("float16")) ac_arr = [] nrg_arr = [] sig_pwr = 0 noise_pwr = 0 sig_cnt = 0 noise_cnt = 0 ac_th = 0.6 for frame in estd.FrameGenerator(audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True): ac = abs(autocorr(frame, mode="half")) nrg = sum(frame**2) ac = ac[0] / sum(ac) if sum(ac) > 0 else 0 ac_arr.append(ac) nrg_arr.append(nrg) ac_arr /= max(ac_arr) nrg_arr /= max(nrg_arr) for nrg, ac in zip(nrg_arr, ac_arr): if nrg < nrg_th: noise_pwr += nrg**2 noise_cnt += 1 else: if ac < ac_th: sig_pwr += nrg**2 sig_cnt += 1 else: noise_pwr += nrg**2 noise_cnt += 1 if noise_cnt == 0: snr = np.inf elif sig_cnt == 0: snr = 10 * np.log10(eps) else: sig_pwr /= sig_cnt noise_pwr /= noise_cnt snr = 10 * np.log10(sig_pwr / noise_pwr) # conf = 1-abs(noise_cnt-sig_cnt)/(sig_cnt + noise_cnt) # if conf > 0.7 and snr < snr_th: # return snr, conf, True # return snr, conf, False return snr, snr < snr_th
def testZero(self): # Test different input sizes. size = 200000 # apx. 4.5s @ 44.1kHz while size > 1000: self.assertEqualVector( self.InitStartStopCut()( esarr(numpy.zeros(size))), (0, 0)) size //= 2
def compute(self, *args): y = [] x = args[1] gapDetector = es.GapsDetector() for frame in es.FrameGenerator(x, frameSize=frame_size, hopSize=hop_size, startFromZero=True): starts, _ = gapDetector(frame) for s in starts: y.append(s) return esarr(y)
def compute(self, *args): x = args[1] y = [] self.algo.reset() for frame in es.FrameGenerator(x, frameSize=frameSize, hopSize=hopSize, startFromZero=True): starts, ends = self.algo(frame) if len(starts) > 0: for start in starts: y.append(start) return esarr(y)
def testSquareWave(self): # The algorithm should be robust to squarewaves if # there are at least a few periods on the frame: # f > ~200Hz for a window size of 512 @ 44.1kHz # Try different frequencies. fs = 44100 minFreq = 200 # Hz maxFreq = 20000 # Hz time = 10 # s for f in numpy.linspace(minFreq, maxFreq, 5): samplenum = int(fs / f) samplenum -= samplenum % 2 waveTable = [0] * samplenum waveTable[:samplenum // 2] = [1] * (samplenum // 2) waveDur = len(waveTable) / 44100. repetitions = int(time / waveDur) input = waveTable * repetitions self.assertEqualVector( self.InitDiscontinuityDetector()(esarr(input))[0], esarr([]))
def testSquareWaves(self): # The algorithm should be able to detect the positive part # of square waves at different frequencies. fs = 44100 minFreq = 100 # Hz maxFreq = 10000 # Hz time = .1 # s sd = SaturationDetector(minimumDuration=.0, hopSize=512) for f in numpy.linspace(minFreq, maxFreq, 5): sampleNum = int(fs / f) sampleNum -= sampleNum % 2 waveTable = [0] * sampleNum waveTable[:sampleNum // 2] = [1] * (sampleNum // 2) waveDur = len(waveTable) / 44100. repetitions = int(time / waveDur) realStarts = esarr(range(0, repetitions)) * waveDur realEnds = realStarts + waveDur input = waveTable * repetitions starts = esarr([]) ends = esarr([]) for frame in FrameGenerator(input, frameSize=512, hopSize=512, startFromZero=True): s, e = sd(frame) starts = np.hstack([starts, s]) ends = np.hstack([ends, e]) self.assertAlmostEqualVectorFixedPrecision(starts, realStarts, 2) self.assertAlmostEqualVectorFixedPrecision(ends, realEnds, 2) sd.reset()
def testLongSaturation(self, frameSize=512, hopSize=256): fs = 44100 signal = [0]*fs + [1]*fs + [0]*fs starts = [] ends = [] sd = SaturationDetector(frameSize=frameSize, hopSize=hopSize) for frame in FrameGenerator(esarr(signal), frameSize=frameSize, hopSize=hopSize, startFromZero=True): s, e = sd(frame) starts += list(s) ends += list(e) self.assertAlmostEqualVector(starts, [1.], 1e-4) self.assertAlmostEqualVector(ends, [2.], 1e-4)
def compute(self, *args): x = args[1] y = [] self._idx = 0 for frame in es.FrameGenerator(x, frameSize=self._frameSize, hopSize=self._hopSize, startFromZero=True): frame = np.abs(frame) starts = [] ends = [] s = int(self._frameSize / 2 - self._hopSize / 2) - 1 # consider non overlapping case e = int(self._frameSize / 2 + self._hopSize / 2) for idx in range(s, e): if frame[idx] >= self._energyThreshold: continue return esarr(y)
def essStartstopDetector(x, frameSize=1024, hopSize=512, percentageThreshold=10, **kwargs): """Breaks x into frames and computes the start and end indexes. Args: x: (list) input signal frameSize: (int) frame size for the analysis in StartStopCut hopSize: (int) hopSize for the analysis in StartStopCut Kwargs: same **kwargs for StartStopCut Returns: ratio of the startcut + stopcut vs the whole audio length """ startStopCut = StartStopCut(frameSize=frameSize, hopSize=hopSize, **kwargs) startCut, stopCut = startStopCut(esarr(x)) percentage = round(100*(startCut + stopCut)/len(x), 2) # len_x = len(x) # del x; del startStopCut; return percentage, percentage > percentageThreshold
def testBroadbandNoiseCorrection(self): from essentia import instantPower from essentia import db2amp frameSize = 512 hopSize = frameSize // 2 fs = 44100. time = 1. # s time_axis = np.arange(0, time, 1 / fs) nsamples = len(time_axis) noise = np.random.randn(nsamples) noise /= np.std(noise) noise_only = .2 signal = np.sin(2 * pi * 5000 * time_axis) signal_db = -22. noise_db = -50. signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs)) signal_and_noise = esarr( db2amp(signal_db) * signal + db2amp(noise_db) * noise) noiseThreshold = -30 corrected = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold) notCorrected = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold, useBroadbadNoiseCorrection=False) for frame in FrameGenerator(signal_and_noise, frameSize=frameSize, hopSize=hopSize): _, snrCorrected, _ = corrected(frame) _, snrNotCorrected, _ = notCorrected(frame) self.assertAlmostEqual(snrCorrected, snrNotCorrected - 10. * np.log10(fs / 2), 1e-4)
def testZero(self): self.assertEqual(SNR()(esarr(np.zeros(512)))[1], -np.inf)
def testOnes(self): self.assertEqualVector(ClickDetector()(esarr(np.ones(512)))[0], esarr([]))
def compute(self, *args): x = args[1] y = [] self._idx = 0 for frame in es.FrameGenerator(x, frameSize=self._frameSize, hopSize=self._hopSize, startFromZero=True): frame = np.abs(frame) starts = [] ends = [] s = int(self._frameSize / 2 - self._hopSize / 2) - 1 # consider non overlapping case e = int(self._frameSize / 2 + self._hopSize / 2) delta = np.diff(frame) delta = np.insert(delta, 0, 0) energyMask = np.array([x > self._energyThreshold for x in frame])[s:e].astype(int) deltaMask = np.array([np.abs(x) <= self._differentialThreshold for x in delta])[s:e].astype(int) combinedMask = energyMask * deltaMask flanks = np.diff(combinedMask) uFlanks = [idx for idx, x in enumerate(flanks) if x == 1] dFlanks = [idx for idx, x in enumerate(flanks) if x == -1] uFlanksValues = [] uFlanksPValues = [] for uFlank in uFlanks: uFlanksValues.append(frame[uFlank + s]) uFlanksPValues.append(frame[uFlank + s - 1]) dFlanksValues = [] dFlanksPValues = [] for dFlank in dFlanks: dFlanksValues.append(frame[dFlank + s]) dFlanksPValues.append(frame[dFlank + s + 1]) if self._previousRegion and dFlanks: start = self._previousRegion end = (self._idx * self._hopSize + dFlanks[0] + s) / self._sampleRate duration = start - end if duration > self._minimumDuration: starts.append(start) ends.append(end) self._previousRegion = None del dFlanks[0] del dFlanksValues[0] del dFlanksPValues[0] if len(dFlanks) is not len(uFlanks): self._previousRegion = (self._idx * self._hopSize + uFlanks[-1] + s) / self._sampleRate del uFlanks[-1] if len(dFlanks) is not len(uFlanks): raise EssentiaException( "Ath this point uFlanks ({}) and dFlanks ({}) are expected to have the same length!".format(len(dFlanks), len(uFlanks))) for idx in range(len(uFlanks)): start = float(self._idx * self._hopSize + uFlanks[idx] + s) / self._sampleRate end = float(self._idx * self._hopSize + dFlanks[idx] + s) / self._sampleRate duration = end - start if duration > self._minimumDuration: xs = [uFlanks[idx] - 1, uFlanks[idx], dFlanks[idx], dFlanks[idx] + 1] ys = [uFlanksPValues[idx], uFlanksValues[idx], dFlanksValues[idx], dFlanksPValues[idx]] coefs = np.polyfit(xs, ys, 2) starts.append(start) ends.append(end) estx, esty = self.maxParable(coefs) if esty > 1.0: starts.append(start) import matplotlib matplotlib.use('Agg') import matplotlib.pyplot as plt plt.axvline(s, color='r') plt.axvline(e, color='r') plt.plot(frame, label='audio') xs = s + uFlanks[idx] plt.axvline(xs, color='g', alpha=.2) xs = s + dFlanks[idx] plt.axvline(xs, color='g', alpha=.2) xs = [uFlanks[idx] - 1, uFlanks[0], dFlanks[0], dFlanks[0] + 1] xs2 = np.array(xs) + s plt.plot(xs2, ys, 'ro', label='points used for the parable estimation') plt.plot(estx + s, esty, 'yo', label='estimated audio peak') x3 = np.linspace(xs[0], xs[-1], 10) y3 = [self.parEval(xx, coefs) for xx in x3] plt.plot(x3 + s, y3, label='estimated parable', alpha=.2) plt.axhline(1.0, color='g', ls='--', alpha=.2, label='maximun dynamic range') plt.title('Parabolic Regression for Clipping Detection') plt.xlim(xs2[0]-15, xs2[0] + 15) plt.legend() plot_name = 'clipping_plots/{}_{}'.format(self._idx, uFlanks[idx]) plt.savefig(plot_name) plt.clf() for start in starts: y.append(start) self._idx += 1 return esarr(y)
def testZero(self): # An array of zeros should return an empty list. size = 1024 self.assertEqualVector( self.InitDiscontinuityDetector(frameSize=size)(esarr( numpy.zeros(size)))[0], esarr([]))
def compute(self, *args): y = [] x = args[1] for frame_idx, frame in enumerate( es.FrameGenerator(x, frameSize=self.frame_size, hopSize=self.hop_size, startFromZero=True)): # frame = es.essentia.normalize(frame) # updating buffers for gap in self._gaps: if not gap['finished'] and not gap['active']: last = np.min([self.frame_size, gap['take']]) gap['take'] -= last gap['buffer'] = np.hstack([gap['buffer'], frame[:last]]) if gap['take'] <= 0: gap['finished'] = True remove_idx = [] for gap_idx, gap in enumerate(self._gaps): if gap['finished']: remove_idx.append(gap_idx) postpower = instantPower(esarr(gap['buffer'])) if postpower > self._prepower_threshold: if self.min_time <= gap['end'] - gap[ 'start'] <= self.max_time: y.append(gap['start']) remove_idx.sort(reverse=True) for i in remove_idx: self._gaps.pop(i) x1 = self.envelope(frame) x2 = esarr(x1 > self._threshold) x3 = self.medianFilter(x2).round().astype(int) x3_d = np.zeros(len(x3)) start_proc = int(self.frame_size / 2 - self.hop_size / 2) end_proc = int(self.frame_size / 2 + self.hop_size / 2) for i in range(start_proc, end_proc): x3_d[i] = x3[i] - x3[i - 1] s_dx = np.argwhere(x3_d == -1) e_dx = np.argwhere(x3_d == 1) # initializing if s_dx.size: offset = frame_idx * self.hop_size for s in s_dx: s = s[0] take_from_buffer = s - self._prepower_samples if take_from_buffer > 0: prepower = instantPower(frame[take_from_buffer:s]) else: prepower = instantPower( esarr( np.hstack([ self.l_buffer[-np.abs(take_from_buffer):], frame[:s] ]))) if prepower > self._prepower_threshold: self._gaps.append({ 'start': (offset + s) / self.fs, 'end': 0, 'buffer': [], 'take': 0, 'active': True, 'finished': False }) # finishing if e_dx.size and self._gaps: offset = frame_idx * self.hop_size for e in e_dx: e = e[0] take_from_next_frame = np.max([ (self._prepower_samples + e) - self.frame_size, 0 ]) for gap in self._gaps: if gap['active']: gap['take'] = take_from_next_frame gap['end'] = (offset + e) / self.fs last = np.min( [self.frame_size, e + self._prepower_samples]) gap['buffer'] = frame[e:last] gap['active'] = False break # update buffers update_num = np.min([self._prepower_samples, self.hop_size]) np.roll(self.l_buffer, -update_num) self.l_buffer[-update_num:] = frame[-update_num:] self._gaps = [] return esarr(y)
def testOnes(self): self.assertEqualVector(HumDetector()(esarr(np.ones(512)))[1], esarr([]))
def testZero(self): self.assertEqualVector(HumDetector()(esarr(np.zeros(512)))[1], esarr([]))
for f in find_files(in_folder, '.wav'): audio = es.MonoLoader(filename=f, sampleRate=fs)() original_len = len(audio) start_jump = original_len // 4 if audio[start_jump] > 0: # Want at least a gap of .5 end = next(idx for idx, i in enumerate(audio[start_jump:]) if i < -.3) else: end = next(idx for idx, i in enumerate(audio[start_jump:]) if i > .3) end_jump = start_jump + end audio = np.hstack([audio[:start_jump], audio[end_jump:]]) text = ['{}\t0.0\tevent\n'.format(start_jump / float(fs))] if not os.path.exists(out_folder): os.mkdir(out_folder) f_name = ''.join(os.path.basename(f).split('.')[:-1]) with open('{}/{}_prominent_jump.lab'.format(out_folder, f_name), 'w') as o_file: o_file.write(''.join(text)) es.MonoWriter( filename='{}/{}_prominent_jump.wav'.format(out_folder, f_name))( esarr(audio))
in_folder = '/home/pablo/data/sns-small/samples' out_folder = '/home/pablo/reps/essentia/test/QA-audio/Hum/Songs50HzHum' fs = 44100. files = [x for x in find_files(in_folder, 'flac')] if not files: print('no files found!') for f in files: try: audio = es.MonoLoader(filename=f, sampleRate=fs)() except Exception: print('{} was not loaded'.format(f)) continue fs = 44100. t = np.linspace(0, len(audio) / fs, len(audio)) freq = 50 sinusoid = np.sin(2 * PI * freq * t) signal = np.array(.95 * audio + .005 * sinusoid, dtype=np.float32) if not os.path.exists(out_folder): os.mkdir(out_folder) f_name = ''.join(os.path.basename(f).split('.')[:-1]) es.MonoWriter(filename='{}/{}_hum.wav'.format(out_folder, f_name))( esarr(signal))
in_folder = '/home/pablo/data/sns-small/samples' out_folder = '/home/pablo/reps/essentia/test/QA-audio/Hum/Songs50HzHum' fs = 44100. files = [x for x in find_files(in_folder, 'flac')] if not files: print('no files found!') for f in files: try: audio = es.MonoLoader(filename=f, sampleRate=fs)() except Exception: print('{} was not loaded'.format(f)) continue fs = 44100. t = np.linspace(0, len(audio) / fs, len(audio)) freq = 50 sinusoid = np.sin(2 * PI * freq * t) signal = np.array(.95 * audio + .005 * sinusoid, dtype=np.float32) if not os.path.exists(out_folder): os.mkdir(out_folder) f_name = ''.join(os.path.basename(f).split('.')[:-1]) es.MonoWriter(filename='{}/{}_hum.wav'.format(out_folder, f_name))(esarr(signal))
import matplotlib.pyplot as plt import os import numpy as np DIR = "../Dataset/BW detection/" for file in os.listdir(DIR): fpath = os.path.join(DIR, file) name, extension = os.path.splitext(file) print(file) if extension == ".wav": x, SR, channels, _, br, _ = estd.AudioLoader(filename=fpath)() channels = x.shape[1] if channels != 1: x = (x[:, 0] + x[:, 1]) / 2 print(x.shape, SR, channels, br) window = estd.Windowing(size=len(x), type="hann") x = window(x) N = int(2**(np.ceil(np.log2(len(x))))) x = np.append(x, np.zeros(N - len(x))) x = esarr(x) tfX = estd.FFT()(x) tfX = 20 * np.log10(abs(tfX)) f = np.arange(int(len(x) / 2) + 1) / len(x) * SR plt.plot(f, tfX[:int(len(x) / 2) + 1]) plt.savefig(os.path.join(DIR, name + ".png")) plt.clf()
def testOnes(self): self.assertEqual(SNR()(esarr(np.ones(512)))[1], np.inf)
def testInputTooShort(self): # If the input size is smaller that the detection thresholds plus # the size of a frame it should throw an Exception. size = 1024 self.assertComputeFails( StartStopCut(frameSize=size), esarr(numpy.ones(size)))
in_folder = '../../audio/recorded' out_folder = '../../QA-audio/Jumps/' fs = 44100. files = [x for x in find_files(in_folder, 'wav')] for f in files: try: audio = es.MonoLoader(filename=f, sampleRate=fs)() except Exception: print('{} was not loaded'.format(f)) continue original_len = len(audio) start_jump = original_len // 4 end_jump = start_jump + int(np.abs(np.random.randn()) * fs) audio = np.hstack([audio[:start_jump], audio[end_jump:]]) text = ['{}\t0.0\tevent\n'.format(start_jump / float(fs))] if not os.path.exists(out_folder): os.mkdir(out_folder) f_name = ''.join(os.path.basename(f).split('.')[:-1]) with open('{}/{}_jump.lab'.format(out_folder, f_name), 'w') as o_file: o_file.write(''.join(text)) es.MonoWriter(filename='{}/{}_jump.wav'.format(out_folder, f_name))(esarr(audio))
def testZero(self): self.assertEqualVector(ClickDetector()(esarr(np.zeros(512)))[0], esarr([]))
def testInputTooShort(self): # If the input size is smaller that the detection thresholds plus # the size of a frame it should throw an Exception. size = 1024 self.assertComputeFails(StartStopCut(frameSize=size), esarr(numpy.ones(size)))
def compute(self, *args): y = [] x = args[1] for frame_idx, frame in enumerate(es.FrameGenerator(x, frameSize=self.frame_size, hopSize=self.hop_size, startFromZero=True)): # frame = es.essentia.normalize(frame) # updating buffers for gap in self._gaps: if not gap['finished'] and not gap['active']: last = np.min([self.frame_size, gap['take']]) gap['take'] -= last gap['buffer'] = np.hstack([gap['buffer'], frame[:last]]) if gap['take'] <= 0: gap['finished'] = True remove_idx = [] for gap_idx, gap in enumerate(self._gaps): if gap['finished']: remove_idx.append(gap_idx) postpower = instantPower(esarr(gap['buffer'])) if postpower > self._prepower_threshold: if self.min_time <= gap['end'] - gap['start'] <= self.max_time: y.append(gap['start']) remove_idx.sort(reverse=True) for i in remove_idx: self._gaps.pop(i) x1 = self.envelope(frame) x2 = esarr(x1 > self._threshold) x3 = self.medianFilter(x2).round().astype(int) x3_d = np.zeros(len(x3)) start_proc = int(self.frame_size / 2 - self.hop_size / 2) end_proc = int(self.frame_size / 2 + self.hop_size / 2) for i in range(start_proc, end_proc): x3_d[i] = x3[i] - x3[i-1] s_dx = np.argwhere(x3_d == -1) e_dx = np.argwhere(x3_d == 1) # initializing if s_dx.size: offset = frame_idx * self.hop_size for s in s_dx: s = s[0] take_from_buffer = s - self._prepower_samples if take_from_buffer > 0: prepower = instantPower(frame[take_from_buffer:s]) else: prepower = instantPower(esarr(np.hstack([self.l_buffer[-np.abs(take_from_buffer):], frame[:s]]))) if prepower > self._prepower_threshold: self._gaps.append({'start': (offset + s) / self.fs, 'end': 0, 'buffer': [], 'take': 0, 'active': True, 'finished': False}) # finishing if e_dx.size and self._gaps: offset = frame_idx * self.hop_size for e in e_dx: e = e[0] take_from_next_frame = np.max([(self._prepower_samples + e) - self.frame_size, 0]) for gap in self._gaps: if gap['active']: gap['take'] = take_from_next_frame gap['end'] = (offset + e) / self.fs last = np.min([self.frame_size, e + self._prepower_samples]) gap['buffer'] = frame[e: last] gap['active'] = False break # update buffers update_num = np.min([self._prepower_samples, self.hop_size]) np.roll(self.l_buffer, -update_num) self.l_buffer[-update_num:] = frame[-update_num:] self._gaps = [] return esarr(y)
def testZero(self): # An array of zeros should return an empty list. size = 1024 self.assertEqualVector( self.InitDiscontinuityDetector(frameSize=size)( esarr(numpy.zeros(size)))[0], esarr([]))
def compute(self, *args): x = args[1] y = [] idx = 0 for frame in es.FrameGenerator(x, frameSize=frameSize, hopSize=hopSize, startFromZero=True): frame = np.abs(frame) starts = [] ends = [] s = int(frameSize // 2 - hopSize // 2) - 1 # consider non overlapping case e = int(frameSize // 2 + hopSize // 2) delta = np.diff(frame) delta = np.insert(delta, 0, 0) energyMask = np.array([x > .9 for x in frame])[s:e].astype(int) deltaMask = np.array([np.abs(x) < .01 for x in delta])[s:e].astype(int) combinedMask = energyMask * deltaMask flanks = np.diff(combinedMask) uFlanks = [idx for idx, x in enumerate(flanks) if x == 1] dFlanks = [idx for idx, x in enumerate(flanks) if x == -1] if self.previousRegion and dFlanks: start = self.previousRegion end = (idx * hopSize + dFlanks[0] + s) / sampleRate duration = start - end if duration > minimumDuration: starts.append(start) ends.append(end) self.previousRegion = None del dFlanks[0] if len(dFlanks) is not len(uFlanks): self.previousRegion = (idx * hopSize + uFlanks[-1] + s) / sampleRate del uFlanks[-1] if len(dFlanks) is not len(uFlanks): raise EssentiaException( "Ath this point uFlanks ({}) and dFlanks ({}) " "are expected to have the same length!".format(len(dFlanks), len(uFlanks))) for idx in range(len(uFlanks)): start = float(idx * hopSize + uFlanks[idx] + s) / sampleRate end = float(idx * hopSize + dFlanks[idx] + s) / sampleRate duration = end - start if duration > minimumDuration: starts.append(start) ends.append(end) for start in starts: y.append(start) idx += 1 return esarr(y)
def testOnes(self): size = 200000 # apx. 4.5s @ 44.1kHz while size > 1000: self.assertEqualVector( self.InitStartStopCut()(esarr(numpy.ones(size))), (1, 1)) size //= 2
def testZero(self): self.assertEqualVector(SaturationDetector()(esarr(np.zeros(512)))[0], esarr([]))
def testZero(self): self.assertEqualVector(TruePeakDetector()(esarr(np.zeros(512)))[0], esarr([]))