Esempio n. 1
0
def hfc(filename):
    audio = MonoLoader(filename=filename, sampleRate=44100)()
    features = []
    for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
        mag, phase =CartesianToPolar()(FFT()(Windowing(type='hann')(frame)))
        features.append(OnsetDetection(method='hfc')(mag, phase))
    return Onsets()(array([features]),[1])
Esempio n. 2
0
def noveltycurve(filename):
    audio = MonoLoader(filename=filename, sampleRate=44100)()
    band_energy = []
    for frame in FrameGenerator(audio, frameSize = 1024, hopSize = 512):
        mag, phase, = CartesianToPolar()(FFT()(Windowing(type='hann')(frame)))
        band_energy.append(FrequencyBands()(mag))
    novelty = NoveltyCurve()(band_energy)
    return Onsets()(np.array([novelty]),[1])
    def calculateDownbeats(self, audio, bpm, phase):
        # Step 0: calculate the CSD (Complex Spectral Difference) features
        # and the associated onset detection function ON LOWPASSED SIGNAL
        spec = Spectrum(size=self.FRAME_SIZE)
        w = Windowing(type='hann')
        fft = FFT()
        c2p = CartesianToPolar()
        od_csd = OnsetDetection(method='complex')
        lowpass = LowPass(cutoffFrequency=1500)

        pool = Pool()

        # TODO test faster (numpy) way
        #audio = lowpass(audio)
        for frame in FrameGenerator(audio,
                                    frameSize=self.FRAME_SIZE,
                                    hopSize=self.HOP_SIZE):
            mag, ph = c2p(fft(w(frame)))
            pool.add('onsets.complex', od_csd(mag, ph))

        # Step 1: normalise the data using an adaptive mean threshold
        novelty_mean = self.adaptive_mean(pool['onsets.complex'], 16.0)

        # Step 2: half-wave rectify the result
        novelty_hwr = (pool['onsets.complex'] - novelty_mean).clip(min=0)

        # Step 7 (experimental): Determine downbeat locations as subsequence with highest complex spectral difference
        for i in range(4):
            phase_frames = (phase * 44100.0) / (512.0)
            frames = (
                np.round(
                    np.arange(phase_frames + i * self.numFramesPerBeat(bpm),
                              np.size(novelty_hwr),
                              4 * self.numFramesPerBeat(bpm))).astype('int')
            )[:
              -1]  # Discard last value to prevent reading beyond array (last value rounded up for example)
            pool.add('output.downbeat',
                     np.sum(novelty_hwr[frames]) / np.size(frames))

            plt.subplot(4, 1, i + 1)
            plt.plot(novelty_hwr)
            for f in frames:
                plt.axvline(x=f)
        print pool['output.downbeat']
        downbeatIndex = np.argmax(pool['output.downbeat'])
        plt.show()

        # experimental
        return 1.0 * self.beats[downbeatIndex::4]
    def __call__(self, audio, SR, sumThreshold=1e-5):
        self.__reset__()

        if audio.ndim > 1:
            audio = np.sum(audio, axis=1) / audio.ndim

        fcIndexArr = []
        self.hist = np.zeros(int(self.frameSize / 2 + 1))
        fft = FFT(size=self.frameSize)  # declare FFT function
        window = Windowing(size=self.frameSize,
                           type="hann")  # declare windowing function
        self.avgFrames = np.zeros(int(self.frameSize / 2) + 1)

        maxNrg = max([
            sum(abs(fft(window(frame)))**2)
            for frame in FrameGenerator(audio,
                                        frameSize=self.frameSize,
                                        hopSize=self.hopSize,
                                        startFromZero=True)
        ])

        for i, frame in enumerate(
                FrameGenerator(audio,
                               frameSize=self.frameSize,
                               hopSize=self.hopSize,
                               startFromZero=True)):

            frame = window(frame)  # apply window to the frame
            frameFft = abs(fft(frame))
            nrg = sum(frameFft**2)

            if nrg >= 0.1 * maxNrg:
                for j in reversed(range(len(frameFft))):
                    if sum(frameFft[j:] / j) >= sumThreshold:
                        fcIndexArr.append(j)
                        self.hist[j] += nrg
                        break
                self.avgFrames = self.avgFrames + frameFft

        if len(fcIndexArr) == 0:
            fcIndexArr.append(int(self.frameSize / 2) + 1)
            self.hist[int(self.frameSize / 2)] += 1

        self.avgFrames /= (i + 1)
        self.mostLikelyBin, conf, binary = self.__computeMeanFc(
            fcIndexArr, np.arange(int(self.frameSize / 2) + 2), hist=self.hist)

        return self.mostLikelyBin * SR / self.frameSize, conf, binary
Esempio n. 5
0
def ninos(filename,gamma=0.94):
    """
    reference: Mounir, M., Karsmakers, P., & Van Waterschoot, T. (2016). Guitar note onset detection based on a spectral sparsity measure. 
    European Signal Processing Conference. https://doi.org/10.1109/EUSIPCO.2016.7760394
    """
    N = 2048
    hopSize = int(N/10)
    J = int(N*gamma/2)
    audio = MonoLoader(filename=filename, sampleRate=44100)()
    mag = []
    for frame in FrameGenerator(audio, frameSize = N, hopSize = hopSize):
        m = CartesianToPolar()(FFT()(Windowing(type='hann')(frame)))[0]
        m = np.asarray(m)
        idx = np.argsort(m)[::-1][:J]
        mag.append(m[idx])
    mag = np.asarray(mag)
    x2 = mag*mag
    inos=np.sum(x2,axis=1)/(np.sum(x2*x2,axis=1)**(0.25))
    ninos = inos/(J**(0.25))
    return  OnsetPeakPickingProcessor(threshold=0.03,fps=44100/hopSize)(ninos)                          
Esempio n. 6
0
def detectBW(audio: list,
             SR: float,
             frame_size=256,
             hop_size=128,
             floor_db=-90,
             oversample_f=1):

    frame_size *= oversample_f  # if an oversample factor is desired, apply it

    fc_index_arr = []
    fft = FFT(size=frame_size)  # declare FFT function
    window = Windowing(size=frame_size,
                       type="hann")  # declare windowing function

    for frame in FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True):

        frame_fft = abs(fft(window(frame)))
        frame_fft_db = 20 * np.log10(
            frame_fft + eps)  # calculate frame fft values in db
        # compute the linear interpolation between the values of the maxima of the spectrum
        interp_frame = compute_spectral_envelope(frame_fft_db, "linear")
        interp_frame = modify_floor(interp_frame, floor_db, log=True)
        fc_index = compute_fc(interp_frame)

        if energy_verification(frame_fft, fc_index):
            fc_index_arr.append(fc_index)

    if len(fc_index_arr) == 0:
        fc_index_arr = [frame_size]

    fc_bin, conf, binary = compute_mean_fc(fc_index_arr,
                                           np.arange(len(frame_fft)), SR)

    # print("mean_fc: ", fc_bin*SR/frame_size ," conf: ", conf ," binary_result: ", binary)

    return fc_bin * SR / frame_size, conf, binary
Esempio n. 7
0
    def f_essentia_extract(Audio):
        ##    METODOS DE LIBRERIA QUE DETECTAN DONDE OCURRE CADA NOTA RESPECTO AL TIEMPO

        od2 = OnsetDetection(method='complex')
        # Let's also get the other algorithms we will need, and a pool to store the results
        w = Windowing(type='hann')
        fft = FFT()  # this gives us a complex FFT
        c2p = CartesianToPolar(
        )  # and this turns it into a pair (magnitude, phase)
        pool = essentia.Pool()

        # Computing onset detection functions.
        for frame in FrameGenerator(Audio, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(w(frame)))
            pool.add('features.complex', od2(mag, phase))

        ## inicio de cada "nota"
        onsets = Onsets()
        tiempos_detectados_essentia = onsets(
            essentia.array([pool['features.complex']]), [1])
        #print(tiempos_detectados_essentia)
        return tiempos_detectados_essentia
Esempio n. 8
0
 def __init__(self):
     super().__init__()
     self._eq = np.array([1])
     self.eq = [1]
     self.fft = FFT()
     self.ifft = IFFT()
Esempio n. 9
0
from build_map import build_map

sampleRate = 44100
frameSize = 2048
hopSize = 1024
windowType = "hann"

mean = Mean()

keyDetector = essentia.standard.Key(pcpSize=12)
spectrum = Spectrum()
window = Windowing(size=frameSize, zeroPadding=0, type=windowType)
mfcc = MFCC()
gaussian = SingleGaussian()
od = OnsetDetection(method='hfc')
fft = FFT()  # this gives us a complex FFT
c2p = CartesianToPolar()  # and this turns it into a pair (magnitude, phase)
onsets = Onsets(alpha=1)

# dissonance
spectralPeaks = SpectralPeaks(sampleRate=sampleRate, orderBy='frequency')
dissonance = Dissonance()

# barkbands
barkbands = BarkBands(sampleRate=sampleRate)

# zero crossing rate
# zerocrossingrate = ZeroCrossingRate()

# odd-to-even harmonic energy ratio
# odd2evenharmonicenergyratio = OddToEvenHarmonicEnergyRatio()
Esempio n. 10
0
    def run(self, audio):

        # TODO put this in some util class

        # Step 0: calculate the CSD (Complex Spectral Difference) features
        # and the associated onset detection function
        spec = Spectrum(size=self.FRAME_SIZE)
        w = Windowing(type='hann')
        fft = FFT()
        c2p = CartesianToPolar()
        od_csd = OnsetDetection(method='complex')

        pool = Pool()

        # TODO test faster (numpy) way
        for frame in FrameGenerator(audio,
                                    frameSize=self.FRAME_SIZE,
                                    hopSize=self.HOP_SIZE):
            mag, phase = c2p(fft(w(frame)))
            pool.add('onsets.complex', od_csd(mag, phase))

        # Step 1: normalise the data using an adaptive mean threshold
        novelty_mean = self.adaptive_mean(pool['onsets.complex'], 16.0)

        # Step 2: half-wave rectify the result
        novelty_hwr = (pool['onsets.complex'] - novelty_mean).clip(min=0)

        # Step 3: then calculate the autocorrelation of this signal
        novelty_autocorr = self.autocorr(novelty_hwr)

        # Step 4: Sum over constant intervals to detect most likely BPM
        valid_bpms = np.arange(self.minBpm, self.maxBpm, self.stepBpm)
        for bpm in valid_bpms:
            frames = (
                np.round(
                    np.arange(0, np.size(novelty_autocorr),
                              self.numFramesPerBeat(bpm))).astype('int')
            )[:
              -1]  # Discard last value to prevent reading beyond array (last value rounded up for example)
            pool.add('output.bpm',
                     np.sum(novelty_autocorr[frames]) / np.size(frames))
        bpm = valid_bpms[np.argmax(pool['output.bpm'])]

        # Step 5: Calculate phase information
        valid_phases = np.arange(0.0, 60.0 / bpm,
                                 0.001)  # Valid phases in SECONDS
        for phase in valid_phases:
            # Convert phase from seconds to frames
            phase_frames = (phase * 44100.0) / (512.0)
            frames = (
                np.round(
                    np.arange(phase_frames, np.size(novelty_hwr),
                              self.numFramesPerBeat(bpm))).astype('int')
            )[:
              -1]  # Discard last value to prevent reading beyond array (last value rounded up for example)
            pool.add('output.phase',
                     np.sum(novelty_hwr[frames]) / np.size(frames))
        phase = valid_phases[np.argmax(pool['output.phase'])]
        print 'PHASE', phase
        # Step 6: Determine the beat locations
        spb = 60. / bpm  #seconds per beat
        beats = (np.arange(phase, (np.size(audio) / 44100) - spb + phase,
                           spb).astype('single'))

        # Store all the results
        self.bpm = bpm
        self.phase = phase
        self.beats = beats

        self.downbeats = self.calculateDownbeats(audio, bpm, phase)