예제 #1
0
def get_sines_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20):
    """
    Perform framewise sinusoidal model in an audio
    :param audio: Audio either mono or stereo. Will be downsampled to mono
    :param sr: Samplerate used for the audio
    :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively.
    """
    if audio.ndim > 1:
        audio = std.MonoMixer()(audio, audio.shape[1])

    len_arrays = 0
    for i, _ in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        len_arrays = i

    fft_algo = std.FFT()
    sine_anal = std.SineModelAnal(maxnSines=nsines,
                                  orderBy='frequency',
                                  minFrequency=1)
    sines = np.zeros([len_arrays + 1, 2, nsines], dtype=np.float32) + eps
    for i, frame in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        fft = fft_algo(frame)
        freqs, mags, _ = sine_anal(fft)
        sorting_indexes = np.argsort(freqs)
        freqs = freqs[sorting_indexes]
        mags = mags[sorting_indexes]
        sines[i, :] = [freqs, mags]
    if onlyfrecuencies:
        return sines[:, 0, :]
    else:
        return sines[:, 0, :], sines[:, 1, :]
예제 #2
0
def analysisSynthesis(params, signal):

    outsignal = array(0)
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    
    frames = cutFrames(params, signal)
    
    w = std.Windowing(type = "hann");
    fft = std.FFT(size = params['frameSize']);
    ifft = std.IFFT(size = params['frameSize']);    
    overl = std.OverlapAdd (frameSize = params['frameSize'], hopSize = params['hopSize'], gain = 1./params['frameSize']);    
    counter = 0
    for f in frames:
      
      
      # STFT analysis
      infft = fft(w(f))
      # here we could apply spectral transformations
      outfft = infft
    
      # STFT synthesis
      ifftframe = ifft(outfft)
      of = ifftframe
      outframe = overl(of)
      
      if counter >= (params['frameSize']/(2*params['hopSize'])):
        outsignal = numpy.append(outsignal,outframe)

      counter += 1

    
    return outsignal
예제 #3
0
def get_hpeaks_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20):
    """
    Get Harmonic peaks in an audio
    :param audio: Audio either mono or stereo. Will be downsampled to mono
    :param sr: Samplerate used for the audio
    :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively.
    """
    if audio.ndim > 1:
        audio = std.MonoMixer()(audio, audio.shape[1])

    fft_algo = std.FFT()
    pyin = std.PitchYin()
    hpeaks = std.HarmonicPeaks()
    sine_anal = std.SineModelAnal(maxnSines=nsines,
                                  orderBy='frequency',
                                  minFrequency=1)
    sines = []
    for i, frame in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        pitch, _ = pyin(frame)
        fft = fft_algo(frame)
        freqs, mags, _ = sine_anal(fft)
        sorting_indexes = np.argsort(freqs)
        freqs = freqs[sorting_indexes]
        mags = mags[sorting_indexes]
        non_zero_freqs = np.where(freqs != 0)
        freqs = freqs[non_zero_freqs]
        mags = mags[non_zero_freqs]
        freqs, mags = hpeaks(freqs, mags, pitch)
        sines.append([freqs, mags])
    sines = np.array(sines)
    if onlyfrecuencies:
        return sines[:, 0, :]
    else:
        return sines[:, 0, :], sines[:, 1, :]
예제 #4
0
    def get_onsets(self, _audio=[]):

        if _audio != []:
            audio = _audio
        else:
            audio = self.audio

        W = es.Windowing(type=self.winType)
        c2p = es.CartesianToPolar()
        fft = es.FFT()
        onsetDetection = es.OnsetDetection(method=self.onsetMethod,
                                           sampleRate=44100)
        onsets = es.Onsets(alpha=.2)
        # onsetIndex = []
        pool = Pool()

        for frame in es.FrameGenerator(audio, frameSize=1024, hopSize=512):
            mag, phase, = c2p(fft(W(frame)))
            onsetDetection.configure(method=self.onsetMethod)
            onsetFunction = onsetDetection(mag, phase)
            pool.add("onsetFunction", onsetFunction)

        DetectedOnsetsArray = onsets([pool["onsetFunction"]], [1])

        return DetectedOnsetsArray
def analysisSynthesis(params, signal):

    outsignal = array(0)
    # framecutter >  windowing > FFT > IFFT > OverlapAdd
    frames = cutFrames(params, signal)

    w = std.Windowing(type="hann")
    fft = std.FFT(size=params['frameSize'])
    ifft = std.IFFT(size=params['frameSize'])
    overl = std.OverlapAdd(frameSize=params['frameSize'],
                           hopSize=params['hopSize'])
    counter = 0
    for f in frames:
        #outframe = OverlapAdd(frameSize = params['frameSize'], hopSize = params['hopSize'])(IFFT(size = params['frameSize'])(FFT(size = params['frameSize'])(Windowing()(f))))

        # STFT analysis
        infft = fft(w(f))
        # here we could apply spectral transformations
        outfft = infft

        # STFT synthesis
        ifftframe = ifft(outfft)
        of = ifftframe
        outframe = overl(of)

        if counter >= (params['frameSize'] / (2 * params['hopSize'])):
            outsignal = numpy.append(outsignal, outframe)

        counter += 1

    return outsignal
예제 #6
0
def segment(audio, hopSize, frameSize, rms_onset_threshold,
            mel_onset_threshold, flux_onset_threshold, onset_threshold):

    # init algorithms
    o_mel = estd.OnsetDetection(method='melflux')
    o_rms = estd.OnsetDetection(method='rms')
    o_hfc = estd.OnsetDetection(method='hfc')
    o_flux = estd.OnsetDetection(method='flux')
    fft = estd.FFT()
    c2p = estd.CartesianToPolar()
    pool = essentia.Pool()
    frame_generator = estd.FrameGenerator(audio,
                                          frameSize=frameSize,
                                          hopSize=hopSize)
    w = estd.Windowing(type='hann')
    yin = estd.PitchYinFFT(frameSize=frameSize,
                           minFrequency=40,
                           maxFrequency=2500,
                           interpolate=True)
    spectrum = estd.Spectrum()
    loudness = estd.Loudness()

    # control parameters
    attack = False
    detection = True
    mel_onset_value = 0
    rms_onset_value = 0

    # output variables
    onset = None
    sustain = None

    for index, frame in enumerate(frame_generator):
        mag, phase = c2p(fft(w(frame)))
        _, conf = yin(spectrum(w(frame)))
        loud = loudness(frame)
        mel_onset = o_mel(mag, phase)
        rms_onset = o_rms(mag, phase)
        hfc_onset = o_hfc(mag, phase)
        flux_onset = o_flux(mag, phase)
        pool.add('onsets_mel', mel_onset)
        pool.add('onsets_rms', rms_onset)
        pool.add('onsets_hfc', hfc_onset)
        pool.add('onsets_flux', flux_onset)
        pool.add('conf', conf)
        pool.add('loudness', loud)

        # condition for onset
        if detection and (flux_onset > flux_onset_threshold or mel_onset > mel_onset_threshold) \
                and rms_onset > rms_onset_threshold and loud > onset_threshold:
            onset = index
            attack = True
            detection = False
            mel_onset_value = mel_onset
            rms_onset_value = rms_onset
        # condition for beginning of sustain
        if attack and conf > 0.5 and rms_onset < rms_onset_value * .05 and mel_onset < mel_onset_value * .3:
            attack = False
            sustain = index
    return onset, sustain
예제 #7
0
def stft(audio,params): # TODO: add fft size
    """ hop size, frame size"""
    hopSize, frameSize, wtype = params
                             
    result = []
    for frame in ess.FrameGenerator(audio, frameSize = frameSize, hopSize = hopSize):
        result.append(ess.FFT()(frame))
    return np.abs(np.asarray(result)),hopSize
예제 #8
0
def nSinesRead(audio_vector):
    sineanal = estd.SineModelAnal(maxnSines=20)
    fft_calc = estd.FFT(size=2048)
    results = []
    for frame in estd.FrameGenerator(audio_vector, 2048, 1024):
        spec = fft_calc(frame)
        results.append(sineanal(spec))
    results = np.array(results)
    freqs = results[:, 0, :]
    mags = results[:, 1, :]
    return freqs, mags
예제 #9
0
 def calculate_function(self):
     onset_func = []
     fft = es.FFT()
     c2p = es.CartesianToPolar()
     for frame in es.FrameGenerator(self.signal,
                                    frameSize=self.frameSize,
                                    hopSize=self.hopSize):
         mag, phase, = c2p(fft(self.window(frame)))
         onset_func.append(self.calcOnsetFunc(mag, phase))
     self.onsetFunction = np.array(onset_func, dtype=np.float32)
     self.onsetTime = np.arange(
         len(onset_func)) * (self.hopSize / self.sampleRate)
     self.onsetTime -= self.hopSize / self.sampleRate
예제 #10
0
def getOnsetFunctions(fname):
    logger = log.get_logger("rhythm")
    zeropadLen = params.Nfft - params.frmSize
    zz = np.zeros((zeropadLen, ), dtype='float32')
    frameCounter = 0
    bufferFrame = np.zeros((params.Nfft / 2 + 1, ))
    logger.info('Reading audio file...')
    audio = ess.MonoLoader(filename=fname)()
    fft = ess.FFT(size=params.Nfft)  # this gives us a complex FFT
    c2p = ess.CartesianToPolar(
    )  # and this turns it into a pair (magnitude, phase)
    pool = es.Pool()
    w = ess.Windowing(type="hamming")
    fTicks = params.fTicks
    poolName = 'features.flux'
    logger.info('Extracting Onset functions...')
    for frame in ess.FrameGenerator(audio,
                                    frameSize=params.frmSize,
                                    hopSize=params.hop):
        frmTime = params.hop / params.Fs * frameCounter + params.frmSize / (
            2.0 * params.Fs)
        zpFrame = np.hstack((frame, zz))
        mag, phase, = c2p(fft(w(zpFrame)))
        magFlux = mag - bufferFrame
        bufferFrame = np.copy(
            mag)  # Copying for the next iteration to compute flux
        for bands in range(params.numBands):
            chosenInd = (fTicks >= params.fBands[bands, 0]) & (
                fTicks <= params.fBands[bands, 1])
            magFluxBand = magFlux[chosenInd]
            magFluxBand = (magFluxBand + abs(magFluxBand)) / 2
            oFn = magFluxBand.sum()
            if (math.isnan(oFn)):
                print("NaN found here")
            pass
            pool.add(poolName + str(bands), oFn)
        pass
        pool.add('features.time', frmTime)
        frameCounter += 1
        if not np.mod(frameCounter, 10000):
            logger.info(
                str(frameCounter) + '/' + str(audio.size / params.hop) + '...')
    logger.info('Total frames processed = ' + str(frameCounter))
    timeStamps = es.array([pool['features.time']])
    all_feat = timeStamps
    for bands in range(params.numBands):
        feat_flux = es.array([pool[poolName + str(bands)]])
        all_feat = np.vstack((all_feat, feat_flux))
    pass
    return np.transpose(all_feat)
예제 #11
0
def OnsetsSegmentation(audio,
                       frame_size=1024,
                       frame_hop=512,
                       windowing_type='hann',
                       onsets_method='hfc'):

    #declaração dos algoritmos que serão usados
    spec = es_mode.Spectrum()
    fft = es_mode.FFT()
    c2p = es_mode.CartesianToPolar()
    od1 = es_mode.OnsetDetection(method=onsets_method)
    w = es_mode.Windowing(type=windowing_type)
    pool = es.Pool()

    #Função que será executada a cada frame
    def F(n):
        spectrum = spec(w(n))
        mag, phase, = c2p(fft(w(n)))
        pool.add('features.spectrum', spectrum)
        pool.add('features.', phase)
        pool.add('features.onsetdetection', od1(spectrum, phase))

    #define a função contínua de onsets para cada frame
    qtdFrames = inFrames(audio=audio,
                         algorithm=F,
                         frameSize=frame_size,
                         hopSize=frame_hop)
    #print("Quantidade de frames: ", qtdFrames)

    audio_duration = es_mode.Duration()(audio)
    frame_rate = qtdFrames / audio_duration
    os = es_mode.Onsets(frameRate=frame_rate)

    #matriz de algoritmos de detecção de onset executados
    onset_detection_matrix = es.array([pool['features.onsetdetection']])

    #segundo parâmetro é o vetor de pesos para cada detecção de onset
    onsets = os(onset_detection_matrix, [1])

    end_times = es.array(np.append(onsets, audio_duration))
    start_times = es.array(np.append([0], onsets))
    segments = es_mode.Slicer(endTimes=end_times,
                              startTimes=start_times,
                              timeUnits="seconds")(audio)
    return segments, onsets
예제 #12
0
def analysisSynthesisStandard(params, signal):
  
    w = std.Windowing(type = "hann");
    fft = std.FFT(size = params['frameSize']);
    ifft = std.IFFT(size = params['frameSize']);    
    overl = std.OverlapAdd (frameSize = params['frameSize'], hopSize = params['hopSize'], gain = 1./params['frameSize']);
    # add half window of zeros to input signal to reach same ooutput length
    signal  = numpy.append(signal, zeros(params['frameSize']/2))
    
    frames = cutFrames(params, signal)

    outsignal = []
    counter = 0
    outframe = array(0)
    for f in frames:
      
      outframe = overl(ifft(fft(w(f))))
      outsignal = numpy.append(outsignal,outframe)


    outsignal = outsignal [2*params['hopSize']:]
    return outsignal
예제 #13
0
    def __detect_onsets(self, file, frame_size, hop_size, windowfnc,
                        normalize) -> None:
        window = estd.Windowing(size=frame_size,
                                type=windowfnc.value,
                                normalized=normalize)
        fft = estd.FFT(size=frame_size)
        pool = es.Pool()
        pool_add = pool.add
        cart_to_polar = estd.CartesianToPolar()
        detect_onset = estd.OnsetDetection(method=self.algo)
        for frame in estd.FrameGenerator(file.audio,
                                         frameSize=frame_size,
                                         hopSize=hop_size):
            mag, phase, = cart_to_polar(fft(window(frame)))
            pool_add(
                "features." + self.algo,
                detect_onset(mag, phase),
            )

        # The onsets algo expects a matrix of features which can be weighted
        self.onsets = estd.Onsets()(es.array([pool["features." + self.algo]]),
                                    [1])
예제 #14
0
def detect_onset(audio, index):
    # should be able to fetch the module from cache
    import essentia.standard as ess_std
    from essentia import array

    print("Subprocess {} starts".format(index))
    processing_start = time()

    onset_detector = ess_std.OnsetDetection(method="complex")
    window = ess_std.Windowing(type="hann")
    fft = ess_std.FFT()
    c2p = ess_std.CartesianToPolar()
    onsets = ess_std.Onsets()

    frames = []
    for frame in ess_std.FrameGenerator(audio, frameSize=1024, hopSize=512):
        mag, phase = c2p(fft(window(frame)))
        frames.append(onset_detector(mag, phase))

    onsets_array = onsets(array([frames]), [1])
    print("Subprocess {} finished. Elapsed time: {:.2}s".format(
        index,
        time() - processing_start))
    return onsets_array
    def analysis_synthesis_spr_model_standard(self, params, signal):

        pool = essentia.Pool()
        #   Streaming Algos for Sine Model Analysis
        w = es.Windowing(type="hann")
        fft = es.FFT(size=params['fftSize'])
        smanal = es.SineModelAnal(
            sampleRate=params['sampleRate'],
            maxnSines=params['maxnSines'],
            magnitudeThreshold=params['magnitudeThreshold'],
            freqDevOffset=params['freqDevOffset'],
            freqDevSlope=params['freqDevSlope'])

        #   Standard Algos for Sine Model Analysis
        smsyn = es.SineModelSynth(sampleRate=params['sampleRate'],
                                  fftSize=params['frameSize'],
                                  hopSize=params['hopSize'])
        ifft = es.IFFT(size=params['frameSize'])
        overlSine = es.OverlapAdd(frameSize=params['frameSize'],
                                  hopSize=params['hopSize'],
                                  gain=1. / params['frameSize'])
        overlres = es.OverlapAdd(frameSize=params['frameSize'],
                                 hopSize=params['hopSize'],
                                 gain=1. / params['frameSize'])

        fft_original = []

        # analysis
        for frame in es.FrameGenerator(signal,
                                       frameSize=params["frameSize"],
                                       hopSize=params["hopSize"]):
            frame_fft = fft(w(frame))
            fft_original.append(frame_fft)
            freqs, mags, phases = smanal(frame_fft)
            pool.add("frequencies", freqs)
            pool.add("magnitudes", mags)
            pool.add("phases", phases)

        # remove short tracks
        minFrames = int(params['minSineDur'] * params['sampleRate'] /
                        params['hopSize'])
        pool = self.cleaningSineTracks(pool, minFrames)

        # synthesis
        sineTracksAudio = np.array([])
        resTracksAudio = np.array([])
        for frame_ix, _ in enumerate(pool["frequencies"]):
            sine_frame_fft = smsyn(pool["magnitudes"][frame_ix],
                                   pool["frequencies"][frame_ix],
                                   pool["phases"][frame_ix])
            res_frame_fft = fft_original[frame_ix] - sine_frame_fft
            sine_outframe = overlSine(ifft(sine_frame_fft))
            sineTracksAudio = np.append(sineTracksAudio, sine_outframe)
            res_outframe = overlres(ifft(res_frame_fft))
            resTracksAudio = np.append(resTracksAudio, res_outframe)

        sineTracksAudio = sineTracksAudio.flatten()[-len(signal):]
        resTracksAudio = resTracksAudio.flatten()[-len(signal):]

        #print("len signal", len(signal), "len res", len(resTracksAudio))
        return essentia.array(signal), essentia.array(
            sineTracksAudio), essentia.array(resTracksAudio)
예제 #16
0
    def __init__(self, params, fsm=None):
        self.onset_threshold = params['onset_threshold']
        self.offset_threshold = params['offset_threshold']
        self.max_attack_time = params['max_attack_time']
        self.max_release_time = params['max_release_time']
        self.attack_slope_ratio = params['attack_slope_ratio']
        self.release_slope_ratio = params['release_slope_ratio']
        self.flux_threshold = params['flux_threshold']
        self.mel_threshold = params['mel_threshold']
        self.rms_threshold = params['rms_threshold']
        self.conf_threshold = params['conf_threshold']
        self.ratio_mel = params['ratio_mel']
        self.ratio_rms = params['ratio_rms']
        self.rms_threshold_value = 0
        self.mel_threshold_vale = 0

        self.fs = params['fs']
        self.hop_size = params['hop_size']
        self.max_attack_frames = seconds2frames(self.max_attack_time,
                                                fs=self.fs,
                                                hop_size=self.hop_size)
        self.max_release_frames = seconds2frames(self.max_release_time,
                                                 fs=self.fs,
                                                 hop_size=self.hop_size)
        self.ext_fsm = fsm  # external state machine to send events to
        self.buffer = []

        self.was_onset = False
        self.was_offset = False
        self.onset_counter = self.offset_counter = None
        self.onset_samples = 2  # number of consecutive samples to be above threshold
        self.offset_samples = 3  # number of consecutive samples to be below threshold
        self.peak_detect = GrowingSlopeEnd(max_frames=self.max_attack_frames,
                                           m=self.attack_slope_ratio)

        # essentia algorithms initialization
        self.o_mel = estd.OnsetDetection(method='melflux')
        self.o_rms = estd.OnsetDetection(method='rms')
        self.o_hfc = estd.OnsetDetection(method='hfc')
        self.o_flux = estd.OnsetDetection(method='flux')
        self.o_complex = estd.OnsetDetection(method='complex')
        self.fft = estd.FFT()
        self.c2p = estd.CartesianToPolar()
        self.w = estd.Windowing(type='hann')

        # STATE MACHINE
        self.fsm = Fysom({
            'initial':
            'detecting',
            'events': [{
                'name': 'onset',
                'src': 'detecting',
                'dst': 'attack'
            }, {
                'name': 'peak',
                'src': 'attack',
                'dst': 'sustain'
            }, {
                'name': 'offset',
                'src': 'sustain',
                'dst': 'detecting'
            }, {
                'name': 'reset',
                'src': ['detecting', 'attack', 'sustain'],
                'dst': 'detecting'
            }],
            'callbacks': {
                'ondetecting': self.on_detecting,
                'onattack': self.on_attack,
                'onsustain': self.on_sustain,
                'onbeforeonset': self.on_onset,
                'onbeforepeak': self.on_peak,
                'onbeforeoffset': self.on_offset
            }
        })
예제 #17
0
def detectBW(fpath: str, frame_size: float, hop_size: float, floor_db: float,
             oversample_f: int):

    if os.path.splitext(fpath)[1] != ".wav":
        raise ValueError(
            "file must be wav"
        )  #check if the file has a wav extension, else: raise error
    if not is_power2(oversample_f):
        raise ValueError("oversample factor can only be 1, 2 or 4"
                         )  #check if the oversample factor is a power of two

    #audio loader returns x, sample_rate, number_channels, md5, bit_rate, codec, of which only the first 3 are needed
    audio, SR = estd.AudioLoader(filename=fpath)()[:2]

    if audio.shape[1] != 1:
        audio = (audio[:, 0] + audio[:, 1]) / 2  #if stereo: downmix to mono

    frame_size *= oversample_f  #if an oversample factor is desired, apply it
    f = np.arange(int(frame_size / 2) +
                  1) / frame_size * SR  #initialize frequency vector or xticks

    fc_index_arr = []
    interpolated_spectrum = np.zeros(
        int(frame_size / 2) + 1)  #initialize interpolated_spectrum array
    fft = estd.FFT(size=frame_size)  #declare FFT function
    window = estd.Windowing(size=frame_size,
                            type="hann")  #declare windowing function

    for i, frame in enumerate(
            estd.FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True)):

        frame = window(frame)  #apply window to the frame
        frame_fft = abs(fft(frame))
        frame_fft_db = 20 * np.log10(
            frame_fft + eps)  #calculate frame fft values in db
        #energy_arr.append(energy(frame_fft))
        interp_frame = compute_spectral_envelope(
            frame_fft_db, f, "linear"
        )  #compute the linear interpolation between the values of the maxima of the spectrum
        interp_frame = modify_floor(interp_frame, floor_db, log=True)

        fc_index = compute_fc(interp_frame)
        fc_index_arr.append(fc_index)

        if energy_verification(frame_fft, fc_index):
            fc_index_arr.append(fc_index)
        #else:
        #	fc_index_arr.append(len(f)-1)

        interpolated_spectrum += interp_frame  #append the values to window

    interpolated_spectrum /= i + 1

    #energy_arr = normalise(energy_arr)
    #energy_mask = energy_arr>0.05
    if len(fc_index_arr) == 0: fc_index_arr = [frame_size]

    hist = compute_histogram(fc_index_arr, f)
    fc, conf, binary = compute_mean_fc(hist, fc_index_arr, f, SR)

    print("filename: ", fpath, "mean_fc: ", fc, " conf: ", conf,
          " binary_result: ", binary)

    fig, ax = plt.subplots(3, 1, figsize=(15, 9))
    ax[0].plot(fc_index_arr, "x")
    ax[1].stem(f, hist)
    ax[2].plot(f, interpolated_spectrum)
    ax[2].axvline(x=fc, color="r")
    plt.show()
예제 #18
0
import matplotlib.pyplot as plt
import os
import numpy as np

DIR = "../Dataset/BW detection/"

for file in os.listdir(DIR):

    fpath = os.path.join(DIR, file)

    name, extension = os.path.splitext(file)
    print(file)
    if extension == ".wav":
        x, SR, channels, _, br, _ = estd.AudioLoader(filename=fpath)()

        channels = x.shape[1]
        if channels != 1: x = (x[:, 0] + x[:, 1]) / 2
        print(x.shape, SR, channels, br)

        window = estd.Windowing(size=len(x), type="hann")
        x = window(x)
        N = int(2**(np.ceil(np.log2(len(x)))))
        x = np.append(x, np.zeros(N - len(x)))
        x = esarr(x)
        tfX = estd.FFT()(x)
        tfX = 20 * np.log10(abs(tfX))
        f = np.arange(int(len(x) / 2) + 1) / len(x) * SR
        plt.plot(f, tfX[:int(len(x) / 2) + 1])
        plt.savefig(os.path.join(DIR, name + ".png"))
        plt.clf()
예제 #19
0
def analyze_hp(filename, segment_duration=20):

    lowlevelFrameSize = 2048
    lowlevelHopSize = 1024
    tonalFrameSize = 4096
    tonalHopSize = 1024

    # Compute replay gain and duration on the entire file, then load the
    # segment that is centered in time with replaygain applied
    audio = es.MonoLoader(filename=filename)()
    replaygain = es.ReplayGain()(audio)

    segment_start = (len(audio) / 44100 - segment_duration) / 2
    segment_end = segment_start + segment_duration

    if segment_start < 0 or segment_end > len(audio) / 44100:
        raise ValueError(
            'Segment duration is larger than the input audio duration')

    loader = es.EasyLoader(filename=filename,
                           replayGain=replaygain,
                           startTime=segment_start,
                           endTime=segment_end)
    window = es.Windowing(type='blackmanharris62')
    fft = es.FFT()

    stft = []

    audio = loader()
    for frame in es.FrameGenerator(audio,
                                   frameSize=lowlevelFrameSize,
                                   hopSize=lowlevelHopSize):
        stft.append(fft(window(frame)))

    # Librosa requires bins x frames format
    stft = np.array(stft).T

    D_harmonic, D_percussive = librosa.decompose.hpss(stft, margin=8)
    D_percussive_magnitude, _ = librosa.magphase(D_percussive)
    D_harmonic_magnitude, _ = librosa.magphase(D_harmonic)

    # Convert back to Essentia format (frames x bins)
    spectrum_harmonic = D_harmonic_magnitude.T
    specturm_percussive = D_percussive_magnitude.T

    # Processing for Mel bands
    melbands = es.MelBands(numberBands=96,
                           lowFrequencyBound=0,
                           highFrequencyBound=11025)

    # Normalize Mel bands: log10(1+x*10000)
    norm = es.UnaryOperator(type='identity', shift=1, scale=10000)
    log10 = es.UnaryOperator(type='log10')

    p = essentia.Pool()

    for spectrum_frame in spectrum_harmonic:
        p.add('melbands_harmonic', log10(norm(melbands(spectrum_frame))))

    for spectrum_frame in specturm_percussive:
        p.add('melbands_percussive', log10(norm(melbands(spectrum_frame))))

    return p
def detectBW(fpath: str, frame_size: float, hop_size: float, floor_db: float,
             oversample_f: int):

    # check if the file has a wav extension, else: raise error
    if os.path.splitext(fpath)[1] != ".wav":
        raise ValueError("file must be wav")

    # check if the oversample factor is a power of two
    if not is_power2(oversample_f):
        raise ValueError("oversample factor can only be 1, 2 or 4")

    # audio loader returns x, sample_rate, number_channels, md5, bit_rate, codec, of which only the first 3 are needed
    audio, SR = estd.AudioLoader(filename=fpath)()[:2]

    # if stereo: downmix to mono
    if audio.shape[1] != 1:
        audio = (audio[:, 0] + audio[:, 1]) / 2

    frame_size *= oversample_f  # if an oversample factor is desired, apply it

    fc_index_arr = []
    hist = np.zeros(129)
    fft = estd.FFT(size=frame_size)  # declare FFT function
    window = estd.Windowing(size=frame_size,
                            type="hann")  # declare windowing function
    avg_frames = np.zeros(int(frame_size / 2) + 1)

    max_nrg = max([
        sum(abs(fft(window(frame)))**2) for frame in estd.FrameGenerator(
            audio, frameSize=frame_size, hopSize=hop_size, startFromZero=True)
    ])

    for i, frame in enumerate(
            estd.FrameGenerator(audio,
                                frameSize=frame_size,
                                hopSize=hop_size,
                                startFromZero=True)):

        frame = window(frame)  # apply window to the frame
        frame_fft = abs(fft(frame))
        nrg = sum(frame_fft**2)

        if nrg >= 0.1 * max_nrg:
            for j in reversed(range(len(frame_fft))):
                if sum(frame_fft[j:] / j) >= 1e-5:
                    j = int(j / frame_size * 128)
                    fc_index_arr.append(j)
                    hist[j] += nrg
                    break
            avg_frames = avg_frames + frame_fft

    if len(fc_index_arr) == 0:
        fc_index_arr.append(128)
        hist[128] += 1

    avg_frames /= (i + 1)
    most_likely_bin, conf, binary = compute_mean_fc(avg_frames,
                                                    fc_index_arr, [],
                                                    SR,
                                                    hist=hist)

    most_likely_bin *= int(frame_size / 128)

    print("f={:0=2f}, conf={:0=2f}, problem={}".format(
        most_likely_bin * SR / frame_size, conf, str(binary)))
    fig, ax = plt.subplots(2, 1, figsize=(15, 9))
    ax[0].plot(20 * np.log10(avg_frames + eps))
    ax[0].axvline(x=most_likely_bin, color='r')
    ax[0].set_ylim(bottom=-120)
    ax[1].stem(hist)
    plt.show()
counter = 0


import matplotlib.pylab as plt

if mode == 'standard':

  # create an audio loader and import audio file
  loader = std.MonoLoader(filename = inputFilename, sampleRate = 44100)
  audio = loader()

  print("Duration of the audio sample [sec]:")
  print(len(audio)/44100.0)

  w = std.Windowing(type = "hann");
  fft = std.FFT(size = framesize);
  ifft = std.IFFT(size = framesize);
  overl = std.OverlapAdd (frameSize = framesize, hopSize = hopsize);
  awrite = std.MonoWriter (filename = outputFilename, sampleRate = 44100);


  for frame in std.FrameGenerator(audio, frameSize = framesize, hopSize = hopsize):
    # STFT analysis
    infft = fft(w(frame))
    
    # here we could apply spectral transformations
    outfft = infft

    # STFT synthesis
    ifftframe = ifft(outfft)
    out = overl(ifftframe)