Ejemplo n.º 1
0
    def testSinusoidalPlusNoise(self):
        from essentia import instantPower
        from essentia import db2amp
        frameSize = 512
        hopSize = frameSize // 2
        fs = 44100.
        time = 5.  # s
        time_axis = np.arange(0, time, 1 / fs)
        nsamples = len(time_axis)
        noise = np.random.randn(nsamples)
        noise /= np.std(noise)
        noise_only = 1

        signal = np.sin(2 * pi * 5000 * time_axis)

        signal_db = -22.
        noise_db  = -50.

        signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs))
        snr_gt = 10. * np.log10(
            (instantPower(esarr(db2amp(signal_db) * signal[int(noise_only * fs):]))) /
            (instantPower(esarr(db2amp(noise_db)  * noise[int(noise_only * fs):]))))\
            - 10. * np.log10(fs / 2.)

        signal_and_noise = esarr(db2amp(signal_db) * signal + db2amp(noise_db) * noise)

        noiseThreshold = -30
        algo = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold)
        for frame in FrameGenerator(signal_and_noise, frameSize=frameSize, hopSize=hopSize):
            _, snr, _ = algo(frame)

        self.assertAlmostEqual(snr, snr_gt, 1e-1)
Ejemplo n.º 2
0
    def testSinusoidalPlusNoise(self):
        from essentia import instantPower
        from essentia import db2amp
        frameSize = 512
        hopSize = frameSize // 2
        fs = 44100.
        time = 5.  # s
        time_axis = np.arange(0, time, 1 / fs)
        nsamples = len(time_axis)
        noise = np.random.randn(nsamples)
        noise /= np.std(noise)
        noise_only = 1

        signal = np.sin(2 * pi * 5000 * time_axis)

        signal_db = -22.
        noise_db = -50.

        signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs))
        snr_gt = 10. * np.log10(
            (instantPower(esarr(db2amp(signal_db) * signal[int(noise_only * fs):]))) /
            (instantPower(esarr(db2amp(noise_db)  * noise[int(noise_only * fs):]))))\
            - 10. * np.log10(fs / 2.)

        signal_and_noise = esarr(
            db2amp(signal_db) * signal + db2amp(noise_db) * noise)

        noiseThreshold = -30
        algo = SNR(frameSize=frameSize, noiseThreshold=noiseThreshold)
        for frame in FrameGenerator(signal_and_noise,
                                    frameSize=frameSize,
                                    hopSize=hopSize):
            _, snr, _ = algo(frame)

        self.assertAlmostEqual(snr, snr_gt, 1e-1)
Ejemplo n.º 3
0
def is_silent_threshold(frame, silence_threshold_dB):
    p = essentia.instantPower( frame )
    silence_threshold = pow(10.0, (silence_threshold_dB / 10.0))
    if p < silence_threshold:
       return 1.0
    else:
       return 0.0
Ejemplo n.º 4
0
def is_silent_threshold(frame, silence_threshold_dB):
    p = es.instantPower(frame)
    silence_threshold = pow(10.0, (silence_threshold_dB / 10.0))
    if p < silence_threshold:
        return 1.0
    else:
        return 0.0
Ejemplo n.º 5
0
    def compute(self, *args):
        x = args[1]
        order = 12
        LPC = es.LPC(order=order, type='regular')
        idx_ = 0
        threshold = 10
        powerEstimationThreshold = 10
        silenceThreshold = db2pow(-50)
        detectionThreshold = db2pow(30)

        start_proc = int(frameSize / 2 - hopSize / 2)
        end_proc = int(frameSize / 2 + hopSize / 2)

        y = []
        for frame in es.FrameGenerator(x,
                                       frameSize=frameSize,
                                       hopSize=hopSize,
                                       startFromZero=True):
            if instantPower(frame) < silenceThreshold:
                idx_ += 1
                continue

            lpc, _ = LPC(frame)

            lpc /= np.max(lpc)

            e = es.IIR(numerator=lpc)(frame)

            e_mf = es.IIR(numerator=-lpc)(e[::-1])[::-1]

            # Thresholding
            th_p = np.max([self.robustPower(e, powerEstimationThreshold) *\
                           detectionThreshold, silenceThreshold])

            detections = [i + start_proc for i, v in\
                          enumerate(e_mf[start_proc:end_proc]**2) if v >= th_p]
            if detections:
                starts = [detections[0]]
                ends = []
                end = detections[0]
                for idx, d in enumerate(detections[1:], 1):
                    if d == detections[idx - 1] + 1:
                        end = d
                    else:
                        ends.append(end)
                        starts.append(d)
                        end = d
                ends.append(end)

                for start in starts:
                    y.append((start + idx_ * hopSize) / 44100.)

                # for end in ends:
                #     y.append((end + idx_ * hopSize) / 44100.)

            idx_ += 1

        return esarr(y)
Ejemplo n.º 6
0
    def compute(self, *args):
        x = args[1]
        order = 12
        LPC = es.LPC(order=order, type='regular')
        idx_ = 0
        threshold = 10
        powerEstimationThreshold = 10
        silenceThreshold = db2pow(-50)
        detectionThreshold = db2pow(30)

        start_proc = int(frameSize / 2 - hopSize / 2)
        end_proc = int(frameSize / 2 + hopSize / 2)

        y = []
        for frame in es.FrameGenerator(x, frameSize=frameSize, 
                                       hopSize=hopSize,
                                       startFromZero=True):
            if instantPower(frame) < silenceThreshold:
                idx_ += 1
                continue

            lpc, _ = LPC(frame)

            lpc /= np.max(lpc)

            e = es.IIR(numerator=lpc)(frame)

            e_mf = es.IIR(numerator=-lpc)(e[::-1])[::-1]

            # Thresholding
            th_p = np.max([self.robustPower(e, powerEstimationThreshold) *\
                           detectionThreshold, silenceThreshold])

            detections = [i + start_proc for i, v in\
                          enumerate(e_mf[start_proc:end_proc]**2) if v >= th_p]
            if detections:
                starts = [detections[0]]
                ends = []
                end = detections[0]
                for idx, d in enumerate(detections[1:], 1):
                    if d == detections[idx-1] + 1:
                        end = d
                    else:
                        ends.append(end)
                        starts.append(d)
                        end = d
                ends.append(end)

                for start in starts:
                    y.append((start + idx_ * hopSize) / 44100.)

                # for end in ends:
                #     y.append((end + idx_ * hopSize) / 44100.)

            idx_ += 1

        return esarr(y)
Ejemplo n.º 7
0
    def compute(self, *args):
        eps = (np.finfo(np.float32).eps)
        def SNR_prior_est(alpha, mmse, noise_pow, snr_inst):
            return alpha * (np.abs(mmse) ** 2) / noise_pow + (1 - alpha) *\
                   np.clip(snr_inst, a_min=0, a_max=None)

        def update_noise_psd(noise_spectrum, noise, alpha=.98):
            return alpha * noise_spectrum + (1 - alpha) * np.abs(noise) ** 2

        def update_y(mean_y, y, alpha=.98):
            return alpha * mean_y + (1 - alpha) * y

        def MMSE(v, snr_post, Y):
            g = 0.8862269254527579 # gamma(1.5)

            output = np.zeros(len(v))

            for idx in range(len(Y)):
                if v[idx] > 10:
                    output[idx] = v[idx] * Y[idx] / snr_post[idx]
                else:
                    output[idx] = g * ( np.sqrt(v[idx]) / (snr_post[idx] + eps)) *\
                                  np.exp(-v[idx] / 2.) *\
                                  ((1 + v[idx]) * iv(0., v[idx] / 2.) +\
                                  v[idx] * iv(1., v[idx] / 2.)) * Y[idx]
            return output

        def SNR_post_est(Y, noise_pow):
            return np.abs(Y) ** 2 / noise_pow

        def SNR_inst_est(snr_post_est):
            return snr_post_est - 1.

        def V(snr_prior, snr_post):
            return (snr_prior / (1. + snr_prior)) * snr_post


        x = esarr(args[1])
        asume_gauss_psd = args[2]
        idx_ = 0

        silenceThreshold = db2pow(noiseThreshold)

        MMSE_alpha = .98
        noise_alpha = .9
        snr_alpha = .95

        y = []

        noise_psd = np.zeros(frameSize // 2 + 1, dtype=np.float32)

        previous_snr_prior = np.zeros(frameSize // 2 + 1, dtype=np.float32)
        previous_snr_inst = np.zeros(frameSize // 2 + 1, dtype=np.float32)
        previous_snr_post = np.zeros(frameSize // 2 + 1, dtype=np.float32)
        previous_Y = np.zeros(frameSize // 2 + 1, dtype=np.float32)
        previous_noise_psd = np.zeros(frameSize // 2 + 1, dtype=np.float32)

        noise_std = 0
        ma_snr_average = 0

        spectrum = es.Spectrum(size=frameSize)
        window = es.Windowing(size=frameSize, type='hann', normalized=False)

        for frame in es.FrameGenerator(x, frameSize=frameSize,
                                       hopSize=hopSize, startFromZero=True):
            Y = spectrum(window(frame))

            if instantPower(frame) < silenceThreshold:
                noise_psd = update_noise_psd(noise_psd, Y, alpha=noise_alpha)

                snr_post = SNR_post_est(Y, noise_psd)
                snr_inst = SNR_inst_est(snr_post)

            else:
                if np.sum(previous_snr_prior) == 0:
                    previous_snr_prior = MMSE_alpha + (1 - MMSE_alpha) * np.clip(previous_snr_inst, a_min=0., a_max=None)

                    if 0:
                        noise_psd = np.ones(frameSize / 2 + 1) * np.mean(noise_psd)

                snr_post = SNR_post_est(Y, noise_psd)
                snr_inst = SNR_inst_est(snr_post)

                v = V(previous_snr_prior, previous_snr_post)

                previous_mmse = MMSE(v, previous_snr_post, previous_Y)

                snr_prior = SNR_prior_est(MMSE_alpha, previous_mmse, 
                                          previous_noise_psd, snr_inst)

                X_psd_est = noise_psd * snr_prior

                snr_average = np.mean(X_psd_est) / np.mean(noise_psd)

                ma_snr_average = update_y(ma_snr_average, snr_average,
                                          alpha=snr_alpha)

                previous_snr_prior = snr_prior

            previous_noise_psd = noise_psd
            previous_snr_post = snr_post
            previous_snr_inst = snr_inst
            previous_Y = Y

            idx_ += 1

        return esarr([ma_snr_average])
Ejemplo n.º 8
0
        for asume_gauss_psd in [0]:
            for noise_only in noise_durations:

                results = []
                gt = []
                for i in range(1):
                    noise = np.random.randn(nsamples)
                    noise /= np.std(noise)


                    signal = np.sin(2 * pi * 5000 * time_axis)

                    signal_db = -22.
                    noise_db  = -50.

                    noise_var = instantPower(esarr(db2amp(noise_db) * noise))
                    signal[:int(noise_only * fs)] = np.zeros(int(noise_only * fs))
                    real_snr_prior = 10. * np.log10(
                        (instantPower(esarr(db2amp(signal_db) * signal[int(noise_only * fs):]))) /
                        (instantPower(esarr(db2amp(noise_db)  * noise[int(noise_only * fs):]))))

                    real_snr_prior_esp_corrected = real_snr_prior - 10. * np.log10(fs / 2.)
                    gt.append(real_snr_prior_esp_corrected)

                    signal_and_noise = esarr(db2amp(signal_db) * signal + db2amp(noise_db) * noise)

                    ma_snr_average = qa.wrappers['Dev'].compute(None, signal_and_noise, asume_gauss_psd, noise_alpha)
                    mean_snr_estimation = 10 * np.log10(ma_snr_average)
                    mean_snr_estimation_corrected = mean_snr_estimation - 10. * np.log10(fs / 2.)
                    print('with dev, error: {:.3f}dB'.format(np.abs(mean_snr_estimation_corrected[0] - real_snr_prior_esp_corrected)))
Ejemplo n.º 9
0
    def compute(self, *args):
        y = []
        x = args[1]
        for frame_idx, frame in enumerate(es.FrameGenerator(x, frameSize=self.frame_size,
                                          hopSize=self.hop_size, startFromZero=True)):
            # frame = es.essentia.normalize(frame)
            # updating buffers
            for gap in self._gaps:
                if not gap['finished'] and not gap['active']:
                    last = np.min([self.frame_size, gap['take']])
                    gap['take'] -= last
                    gap['buffer'] = np.hstack([gap['buffer'], frame[:last]])
                    if gap['take'] <= 0:
                        gap['finished'] = True
            remove_idx = []
            for gap_idx, gap in enumerate(self._gaps):
                if gap['finished']:
                    remove_idx.append(gap_idx)
                    postpower = instantPower(esarr(gap['buffer']))
                    if postpower > self._prepower_threshold:
                        if self.min_time <= gap['end'] - gap['start'] <= self.max_time:
                            y.append(gap['start'])

            remove_idx.sort(reverse=True)
            for i in remove_idx:
                self._gaps.pop(i)

            x1 = self.envelope(frame)
            x2 = esarr(x1 > self._threshold)

            x3 = self.medianFilter(x2).round().astype(int)

            x3_d = np.zeros(len(x3))

            start_proc = int(self.frame_size / 2 - self.hop_size / 2)
            end_proc = int(self.frame_size / 2 + self.hop_size / 2)
            for i in range(start_proc, end_proc):

                x3_d[i] = x3[i] - x3[i-1]

            s_dx = np.argwhere(x3_d == -1)
            e_dx = np.argwhere(x3_d == 1)

            # initializing
            if s_dx.size:
                offset = frame_idx * self.hop_size
                for s in s_dx:
                    s = s[0]
                    take_from_buffer = s - self._prepower_samples
                    if take_from_buffer > 0:
                        prepower = instantPower(frame[take_from_buffer:s])
                    else:
                        prepower = instantPower(esarr(np.hstack([self.l_buffer[-np.abs(take_from_buffer):],
                                                frame[:s]])))
                    if prepower > self._prepower_threshold:
                        self._gaps.append({'start': (offset + s) / self.fs,
                                          'end': 0,
                                          'buffer': [],
                                          'take': 0,
                                          'active': True,
                                          'finished': False})

            # finishing
            if e_dx.size and self._gaps:
                offset = frame_idx * self.hop_size
                for e in e_dx:
                    e = e[0]
                    take_from_next_frame = np.max([(self._prepower_samples + e) - self.frame_size, 0])
                    for gap in self._gaps:
                        if gap['active']:
                            gap['take'] = take_from_next_frame
                            gap['end'] = (offset + e) / self.fs
                            last = np.min([self.frame_size, e + self._prepower_samples])
                            gap['buffer'] = frame[e: last]
                            gap['active'] = False
                            break

            # update buffers
            update_num = np.min([self._prepower_samples, self.hop_size])
            np.roll(self.l_buffer, -update_num)
            self.l_buffer[-update_num:] = frame[-update_num:]

        self._gaps = []
        return esarr(y)
def compute(audio, pool, options):

    sampleRate = options['sampleRate']
    frameSize = options['frameSize']
    hopSize = options['hopSize']
    zeroPadding = options['zeroPadding']
    windowType = options['windowType']

    frameRate = float(sampleRate) / float(frameSize - hopSize)

    INFO('Computing Onset Detection...')

    frames = essentia.FrameGenerator(audio=audio,
                                     frameSize=frameSize,
                                     hopSize=hopSize)
    window = essentia.Windowing(size=frameSize,
                                zeroPadding=zeroPadding,
                                type=windowType)
    fft = essentia.FFT()
    cartesian2polar = essentia.CartesianToPolar()
    onsetdetectionHFC = essentia.OnsetDetection(method="hfc",
                                                sampleRate=sampleRate)
    onsetdetectionComplex = essentia.OnsetDetection(method="complex",
                                                    sampleRate=sampleRate)
    onsets = essentia.Onsets(frameRate=frameRate)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize * 0.5

    hfc = []
    complex = []

    progress = Progress(total=total_frames)

    for frame in frames:

        if essentia.instantPower(frame) < 1.e-4:
            total_frames -= 1
            start_of_frame += hopSize
            hfc.append(0.)
            complex.append(0.)
            continue

        windowed_frame = window(frame)
        complex_fft = fft(windowed_frame)
        (spectrum, phase) = cartesian2polar(complex_fft)
        hfc.append(onsetdetectionHFC(spectrum, phase))
        complex.append(onsetdetectionComplex(spectrum, phase))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # The onset rate is defined as the number of onsets per seconds
    detections = numpy.concatenate(
        [essentia.array([hfc]),
         essentia.array([complex])])

    # prune all 'doubled' detections
    time_onsets = list(onsets(detections, essentia.array([1, 1])))
    t = 1
    while t < len(time_onsets):
        if time_onsets[t] - time_onsets[t - 1] < 0.080: time_onsets.pop(t)
        else: t += 1

    onsetrate = len(time_onsets) / (len(audio) / sampleRate)

    pool.add(namespace + '.' + "onset_times",
             essentia.array(time_onsets))  #, pool.GlobalScope)
    pool.add(namespace + '.' + "onset_rate", onsetrate)  #, pool.GlobalScope)

    progress.finish()
Ejemplo n.º 11
0
def compute(audio, pool, options):

    sampleRate  = options['sampleRate']
    frameSize   = options['frameSize']
    hopSize     = options['hopSize']
    zeroPadding = options['zeroPadding']
    windowType  = options['windowType']

    frameRate = float(sampleRate)/float(frameSize - hopSize)

    INFO('Computing Onset Detection...')

    frames  = essentia.FrameGenerator(audio = audio, frameSize = frameSize, hopSize = hopSize)
    window  = essentia.Windowing(size = frameSize, zeroPadding = zeroPadding, type = windowType)
    fft = essentia.FFT()
    cartesian2polar = essentia.CartesianToPolar()
    onsetdetectionHFC = essentia.OnsetDetection(method = "hfc", sampleRate = sampleRate)
    onsetdetectionComplex = essentia.OnsetDetection(method = "complex", sampleRate = sampleRate)
    onsets = essentia.Onsets(frameRate = frameRate)

    total_frames = frames.num_frames()
    n_frames = 0
    start_of_frame = -frameSize*0.5

    hfc = []
    complex = []

    progress = Progress(total = total_frames)

    for frame in frames:

        if essentia.instantPower(frame) < 1.e-4 :
           total_frames -= 1
           start_of_frame += hopSize
           hfc.append(0.)
           complex.append(0.)
           continue

        windowed_frame = window(frame)
        complex_fft = fft(windowed_frame)
        (spectrum,phase) = cartesian2polar(complex_fft)
        hfc.append(onsetdetectionHFC(spectrum,phase))
        complex.append(onsetdetectionComplex(spectrum,phase))

        # display of progress report
        progress.update(n_frames)

        n_frames += 1
        start_of_frame += hopSize

    # The onset rate is defined as the number of onsets per seconds
    detections = numpy.concatenate([essentia.array([hfc]), essentia.array([complex]) ])

    # prune all 'doubled' detections
    time_onsets = list(onsets(detections, essentia.array([1, 1])))
    t = 1
    while t < len(time_onsets):
      if time_onsets[t] - time_onsets[t-1] < 0.080: time_onsets.pop(t)
      else: t += 1

    onsetrate = len(time_onsets) / ( len(audio) / sampleRate )

    pool.add(namespace + '.' + "onset_times", essentia.array(time_onsets))#, pool.GlobalScope)
    pool.add(namespace + '.' + "onset_rate", onsetrate)#, pool.GlobalScope)

    progress.finish()
Ejemplo n.º 12
0
    for noise_alpha in [.9]:
        for asume_gauss_psd in [0]:
            for noise_only in noise_durations:

                results = []
                gt = []
                for i in range(1):
                    noise = np.random.randn(nsamples)
                    noise /= np.std(noise)

                    signal = np.sin(2 * pi * 5000 * time_axis)

                    signal_db = -22.
                    noise_db = -50.

                    noise_var = instantPower(esarr(db2amp(noise_db) * noise))
                    signal[:int(noise_only * fs)] = np.zeros(
                        int(noise_only * fs))
                    real_snr_prior = 10. * np.log10((instantPower(
                        esarr(
                            db2amp(signal_db) * signal[int(noise_only * fs):])
                    )) / (instantPower(
                        esarr(
                            db2amp(noise_db) * noise[int(noise_only * fs):]))))

                    real_snr_prior_esp_corrected = real_snr_prior - 10. * np.log10(
                        fs / 2.)
                    gt.append(real_snr_prior_esp_corrected)

                    signal_and_noise = esarr(
                        db2amp(signal_db) * signal + db2amp(noise_db) * noise)
Ejemplo n.º 13
0
    def compute(self, *args):
        y = []
        x = args[1]
        for frame_idx, frame in enumerate(
                es.FrameGenerator(x,
                                  frameSize=self.frame_size,
                                  hopSize=self.hop_size,
                                  startFromZero=True)):
            # frame = es.essentia.normalize(frame)
            # updating buffers
            for gap in self._gaps:
                if not gap['finished'] and not gap['active']:
                    last = np.min([self.frame_size, gap['take']])
                    gap['take'] -= last
                    gap['buffer'] = np.hstack([gap['buffer'], frame[:last]])
                    if gap['take'] <= 0:
                        gap['finished'] = True
            remove_idx = []
            for gap_idx, gap in enumerate(self._gaps):
                if gap['finished']:
                    remove_idx.append(gap_idx)
                    postpower = instantPower(esarr(gap['buffer']))
                    if postpower > self._prepower_threshold:
                        if self.min_time <= gap['end'] - gap[
                                'start'] <= self.max_time:
                            y.append(gap['start'])

            remove_idx.sort(reverse=True)
            for i in remove_idx:
                self._gaps.pop(i)

            x1 = self.envelope(frame)
            x2 = esarr(x1 > self._threshold)

            x3 = self.medianFilter(x2).round().astype(int)

            x3_d = np.zeros(len(x3))

            start_proc = int(self.frame_size / 2 - self.hop_size / 2)
            end_proc = int(self.frame_size / 2 + self.hop_size / 2)
            for i in range(start_proc, end_proc):

                x3_d[i] = x3[i] - x3[i - 1]

            s_dx = np.argwhere(x3_d == -1)
            e_dx = np.argwhere(x3_d == 1)

            # initializing
            if s_dx.size:
                offset = frame_idx * self.hop_size
                for s in s_dx:
                    s = s[0]
                    take_from_buffer = s - self._prepower_samples
                    if take_from_buffer > 0:
                        prepower = instantPower(frame[take_from_buffer:s])
                    else:
                        prepower = instantPower(
                            esarr(
                                np.hstack([
                                    self.l_buffer[-np.abs(take_from_buffer):],
                                    frame[:s]
                                ])))
                    if prepower > self._prepower_threshold:
                        self._gaps.append({
                            'start': (offset + s) / self.fs,
                            'end': 0,
                            'buffer': [],
                            'take': 0,
                            'active': True,
                            'finished': False
                        })

            # finishing
            if e_dx.size and self._gaps:
                offset = frame_idx * self.hop_size
                for e in e_dx:
                    e = e[0]
                    take_from_next_frame = np.max([
                        (self._prepower_samples + e) - self.frame_size, 0
                    ])
                    for gap in self._gaps:
                        if gap['active']:
                            gap['take'] = take_from_next_frame
                            gap['end'] = (offset + e) / self.fs
                            last = np.min(
                                [self.frame_size, e + self._prepower_samples])
                            gap['buffer'] = frame[e:last]
                            gap['active'] = False
                            break

            # update buffers
            update_num = np.min([self._prepower_samples, self.hop_size])
            np.roll(self.l_buffer, -update_num)
            self.l_buffer[-update_num:] = frame[-update_num:]

        self._gaps = []
        return esarr(y)