Ejemplo n.º 1
0
    def get_onsets(self, _audio=[]):

        if _audio != []:
            audio = _audio
        else:
            audio = self.audio

        W = es.Windowing(type=self.winType)
        c2p = es.CartesianToPolar()
        fft = es.FFT()
        onsetDetection = es.OnsetDetection(method=self.onsetMethod,
                                           sampleRate=44100)
        onsets = es.Onsets(alpha=.2)
        # onsetIndex = []
        pool = Pool()

        for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(W(frame)))
            onsetDetection.configure(method=self.onsetMethod)
            onsetFunction = onsetDetection(mag, phase)
            pool.add("onsetFunction", onsetFunction)

        DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1])

        return DetectedOnsetsArray
Ejemplo n.º 2
0
def segment(audio, hopSize, frameSize, rms_onset_threshold,
            mel_onset_threshold, flux_onset_threshold, onset_threshold):

    # init algorithms
    o_mel = estd.OnsetDetection(method='melflux')
    o_rms = estd.OnsetDetection(method='rms')
    o_hfc = estd.OnsetDetection(method='hfc')
    o_flux = estd.OnsetDetection(method='flux')
    fft = estd.FFT()
    c2p = estd.CartesianToPolar()
    pool = essentia.Pool()
    frame_generator = estd.FrameGenerator(audio,
                                          frameSize=frameSize,
                                          hopSize=hopSize)
    w = estd.Windowing(type='hann')
    yin = estd.PitchYinFFT(frameSize=frameSize,
                           minFrequency=40,
                           maxFrequency=2500,
                           interpolate=True)
    spectrum = estd.Spectrum()
    loudness = estd.Loudness()

    # control parameters
    attack = False
    detection = True
    mel_onset_value = 0
    rms_onset_value = 0

    # output variables
    onset = None
    sustain = None

    for index, frame in enumerate(frame_generator):
        mag, phase = c2p(fft(w(frame)))
        _, conf = yin(spectrum(w(frame)))
        loud = loudness(frame)
        mel_onset = o_mel(mag, phase)
        rms_onset = o_rms(mag, phase)
        hfc_onset = o_hfc(mag, phase)
        flux_onset = o_flux(mag, phase)
        pool.add('onsets_mel', mel_onset)
        pool.add('onsets_rms', rms_onset)
        pool.add('onsets_hfc', hfc_onset)
        pool.add('onsets_flux', flux_onset)
        pool.add('conf', conf)
        pool.add('loudness', loud)

        # condition for onset
        if detection and (flux_onset > flux_onset_threshold or mel_onset > mel_onset_threshold) \
                and rms_onset > rms_onset_threshold and loud > onset_threshold:
            onset = index
            attack = True
            detection = False
            mel_onset_value = mel_onset
            rms_onset_value = rms_onset
        # condition for beginning of sustain
        if attack and conf > 0.5 and rms_onset < rms_onset_value * .05 and mel_onset < mel_onset_value * .3:
            attack = False
            sustain = index
    return onset, sustain
Ejemplo n.º 3
0
 def calculate_function(self):
     onset_func = []
     fft = es.FFT()
     c2p = es.CartesianToPolar()
     for frame in es.FrameGenerator(self.signal,
                                    frameSize=self.frameSize,
                                    hopSize=self.hopSize):
         mag, phase, = c2p(fft(self.window(frame)))
         onset_func.append(self.calcOnsetFunc(mag, phase))
     self.onsetFunction = np.array(onset_func, dtype=np.float32)
     self.onsetTime = np.arange(
         len(onset_func)) * (self.hopSize / self.sampleRate)
     self.onsetTime -= self.hopSize / self.sampleRate
Ejemplo n.º 4
0
def getOnsetFunctions(fname):
    logger = log.get_logger("rhythm")
    zeropadLen = params.Nfft - params.frmSize
    zz = np.zeros((zeropadLen, ), dtype='float32')
    frameCounter = 0
    bufferFrame = np.zeros((params.Nfft / 2 + 1, ))
    logger.info('Reading audio file...')
    audio = ess.MonoLoader(filename=fname)()
    fft = ess.FFT(size=params.Nfft)  # this gives us a complex FFT
    c2p = ess.CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)
    pool = es.Pool()
    w = ess.Windowing(type="hamming")
    fTicks = params.fTicks
    poolName = 'features.flux'
    logger.info('Extracting Onset functions...')
    for frame in ess.FrameGenerator(audio,
                                    frameSize=params.frmSize,
                                    hopSize=params.hop):
        frmTime = params.hop / params.Fs * frameCounter + params.frmSize / (
            2.0 * params.Fs)
        zpFrame = np.hstack((frame, zz))
        mag, phase, = c2p(fft(w(zpFrame)))
        magFlux = mag - bufferFrame
        bufferFrame = np.copy(
            mag)  # Copying for the next iteration to compute flux
        for bands in range(params.numBands):
            chosenInd = (fTicks >= params.fBands[bands, 0]) & (
                fTicks <= params.fBands[bands, 1])
            magFluxBand = magFlux[chosenInd]
            magFluxBand = (magFluxBand + abs(magFluxBand)) / 2
            oFn = magFluxBand.sum()
            if (math.isnan(oFn)):
                print("NaN found here")
            pass
            pool.add(poolName + str(bands), oFn)
        pass
        pool.add('features.time', frmTime)
        frameCounter += 1
        if not np.mod(frameCounter, 10000):
            logger.info(
                str(frameCounter) + '/' + str(audio.size / params.hop) + '...')
    logger.info('Total frames processed = ' + str(frameCounter))
    timeStamps = es.array([pool['features.time']])
    all_feat = timeStamps
    for bands in range(params.numBands):
        feat_flux = es.array([pool[poolName + str(bands)]])
        all_feat = np.vstack((all_feat, feat_flux))
    pass
    return np.transpose(all_feat)
Ejemplo n.º 5
0
def OnsetsSegmentation(audio,
                       frame_size=1024,
                       frame_hop=512,
                       windowing_type='hann',
                       onsets_method='hfc'):

    #declaração dos algoritmos que serão usados
    spec = es_mode.Spectrum()
    fft = es_mode.FFT()
    c2p = es_mode.CartesianToPolar()
    od1 = es_mode.OnsetDetection(method=onsets_method)
    w = es_mode.Windowing(type=windowing_type)
    pool = es.Pool()

    #Função que será executada a cada frame
    def F(n):
        spectrum = spec(w(n))
        mag, phase, = c2p(fft(w(n)))
        pool.add('features.spectrum', spectrum)
        pool.add('features.', phase)
        pool.add('features.onsetdetection', od1(spectrum, phase))

    #define a função contínua de onsets para cada frame
    qtdFrames = inFrames(audio=audio,
                         algorithm=F,
                         frameSize=frame_size,
                         hopSize=frame_hop)
    #print("Quantidade de frames: ", qtdFrames)

    audio_duration = es_mode.Duration()(audio)
    frame_rate = qtdFrames / audio_duration
    os = es_mode.Onsets(frameRate=frame_rate)

    #matriz de algoritmos de detecção de onset executados
    onset_detection_matrix = es.array([pool['features.onsetdetection']])

    #segundo parâmetro é o vetor de pesos para cada detecção de onset
    onsets = os(onset_detection_matrix, [1])

    end_times = es.array(np.append(onsets, audio_duration))
    start_times = es.array(np.append([0], onsets))
    segments = es_mode.Slicer(endTimes=end_times,
                              startTimes=start_times,
                              timeUnits="seconds")(audio)
    return segments, onsets
Ejemplo n.º 6
0
    def __detect_onsets(self, file, frame_size, hop_size, windowfnc,
                        normalize) -> None:
        window = estd.Windowing(size=frame_size,
                                type=windowfnc.value,
                                normalized=normalize)
        fft = estd.FFT(size=frame_size)
        pool = es.Pool()
        pool_add = pool.add
        cart_to_polar = estd.CartesianToPolar()
        detect_onset = estd.OnsetDetection(method=self.algo)
        for frame in estd.FrameGenerator(file.audio,
                                         frameSize=frame_size,
                                         hopSize=hop_size):
            mag, phase, = cart_to_polar(fft(window(frame)))
            pool_add(
                "features." + self.algo,
                detect_onset(mag, phase),
            )

        # The onsets algo expects a matrix of features which can be weighted
        self.onsets = estd.Onsets()(es.array([pool["features." + self.algo]]),
                                    [1])
Ejemplo n.º 7
0
def detect_onset(audio, index):
    # should be able to fetch the module from cache
    import essentia.standard as ess_std
    from essentia import array

    print("Subprocess {} starts".format(index))
    processing_start = time()

    onset_detector = ess_std.OnsetDetection(method="complex")
    window = ess_std.Windowing(type="hann")
    fft = ess_std.FFT()
    c2p = ess_std.CartesianToPolar()
    onsets = ess_std.Onsets()

    frames = []
    for frame in ess_std.FrameGenerator(audio, frameSize=1024, hopSize=512):
        mag, phase = c2p(fft(window(frame)))
        frames.append(onset_detector(mag, phase))

    onsets_array = onsets(array([frames]), [1])
    print("Subprocess {} finished. Elapsed time: {:.2}s".format(
        index,
        time() - processing_start))
    return onsets_array
Ejemplo n.º 8
0
    def __init__(self, params, fsm=None):
        self.onset_threshold = params['onset_threshold']
        self.offset_threshold = params['offset_threshold']
        self.max_attack_time = params['max_attack_time']
        self.max_release_time = params['max_release_time']
        self.attack_slope_ratio = params['attack_slope_ratio']
        self.release_slope_ratio = params['release_slope_ratio']
        self.flux_threshold = params['flux_threshold']
        self.mel_threshold = params['mel_threshold']
        self.rms_threshold = params['rms_threshold']
        self.conf_threshold = params['conf_threshold']
        self.ratio_mel = params['ratio_mel']
        self.ratio_rms = params['ratio_rms']
        self.rms_threshold_value = 0
        self.mel_threshold_vale = 0

        self.fs = params['fs']
        self.hop_size = params['hop_size']
        self.max_attack_frames = seconds2frames(self.max_attack_time,
                                                fs=self.fs,
                                                hop_size=self.hop_size)
        self.max_release_frames = seconds2frames(self.max_release_time,
                                                 fs=self.fs,
                                                 hop_size=self.hop_size)
        self.ext_fsm = fsm  # external state machine to send events to
        self.buffer = []

        self.was_onset = False
        self.was_offset = False
        self.onset_counter = self.offset_counter = None
        self.onset_samples = 2  # number of consecutive samples to be above threshold
        self.offset_samples = 3  # number of consecutive samples to be below threshold
        self.peak_detect = GrowingSlopeEnd(max_frames=self.max_attack_frames,
                                           m=self.attack_slope_ratio)

        # essentia algorithms initialization
        self.o_mel = estd.OnsetDetection(method='melflux')
        self.o_rms = estd.OnsetDetection(method='rms')
        self.o_hfc = estd.OnsetDetection(method='hfc')
        self.o_flux = estd.OnsetDetection(method='flux')
        self.o_complex = estd.OnsetDetection(method='complex')
        self.fft = estd.FFT()
        self.c2p = estd.CartesianToPolar()
        self.w = estd.Windowing(type='hann')

        # STATE MACHINE
        self.fsm = Fysom({
            'initial':
            'detecting',
            'events': [{
                'name': 'onset',
                'src': 'detecting',
                'dst': 'attack'
            }, {
                'name': 'peak',
                'src': 'attack',
                'dst': 'sustain'
            }, {
                'name': 'offset',
                'src': 'sustain',
                'dst': 'detecting'
            }, {
                'name': 'reset',
                'src': ['detecting', 'attack', 'sustain'],
                'dst': 'detecting'
            }],
            'callbacks': {
                'ondetecting': self.on_detecting,
                'onattack': self.on_attack,
                'onsustain': self.on_sustain,
                'onbeforeonset': self.on_onset,
                'onbeforepeak': self.on_peak,
                'onbeforeoffset': self.on_offset
            }
        })