Esempio n. 1
0
def get_sines_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20):
    """
    Perform framewise sinusoidal model in an audio
    :param audio: Audio either mono or stereo. Will be downsampled to mono
    :param sr: Samplerate used for the audio
    :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively.
    """
    if audio.ndim > 1:
        audio = std.MonoMixer()(audio, audio.shape[1])

    len_arrays = 0
    for i, _ in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        len_arrays = i

    fft_algo = std.FFT()
    sine_anal = std.SineModelAnal(maxnSines=nsines,
                                  orderBy='frequency',
                                  minFrequency=1)
    sines = np.zeros([len_arrays + 1, 2, nsines], dtype=np.float32) + eps
    for i, frame in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        fft = fft_algo(frame)
        freqs, mags, _ = sine_anal(fft)
        sorting_indexes = np.argsort(freqs)
        freqs = freqs[sorting_indexes]
        mags = mags[sorting_indexes]
        sines[i, :] = [freqs, mags]
    if onlyfrecuencies:
        return sines[:, 0, :]
    else:
        return sines[:, 0, :], sines[:, 1, :]
Esempio n. 2
0
def get_hpeaks_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20):
    """
    Get Harmonic peaks in an audio
    :param audio: Audio either mono or stereo. Will be downsampled to mono
    :param sr: Samplerate used for the audio
    :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively.
    """
    if audio.ndim > 1:
        audio = std.MonoMixer()(audio, audio.shape[1])

    fft_algo = std.FFT()
    pyin = std.PitchYin()
    hpeaks = std.HarmonicPeaks()
    sine_anal = std.SineModelAnal(maxnSines=nsines,
                                  orderBy='frequency',
                                  minFrequency=1)
    sines = []
    for i, frame in enumerate(
            std.FrameGenerator(audio, frameSize=4096, hopSize=2048)):
        pitch, _ = pyin(frame)
        fft = fft_algo(frame)
        freqs, mags, _ = sine_anal(fft)
        sorting_indexes = np.argsort(freqs)
        freqs = freqs[sorting_indexes]
        mags = mags[sorting_indexes]
        non_zero_freqs = np.where(freqs != 0)
        freqs = freqs[non_zero_freqs]
        mags = mags[non_zero_freqs]
        freqs, mags = hpeaks(freqs, mags, pitch)
        sines.append([freqs, mags])
    sines = np.array(sines)
    if onlyfrecuencies:
        return sines[:, 0, :]
    else:
        return sines[:, 0, :], sines[:, 1, :]
Esempio n. 3
0
def nSinesRead(audio_vector):
    sineanal = estd.SineModelAnal(maxnSines=20)
    fft_calc = estd.FFT(size=2048)
    results = []
    for frame in estd.FrameGenerator(audio_vector, 2048, 1024):
        spec = fft_calc(frame)
        results.append(sineanal(spec))
    results = np.array(results)
    freqs = results[:, 0, :]
    mags = results[:, 1, :]
    return freqs, mags
    def analysis_synthesis_spr_model_standard(self, params, signal):

        pool = essentia.Pool()
        #   Streaming Algos for Sine Model Analysis
        w = es.Windowing(type="hann")
        fft = es.FFT(size=params['fftSize'])
        smanal = es.SineModelAnal(
            sampleRate=params['sampleRate'],
            maxnSines=params['maxnSines'],
            magnitudeThreshold=params['magnitudeThreshold'],
            freqDevOffset=params['freqDevOffset'],
            freqDevSlope=params['freqDevSlope'])

        #   Standard Algos for Sine Model Analysis
        smsyn = es.SineModelSynth(sampleRate=params['sampleRate'],
                                  fftSize=params['frameSize'],
                                  hopSize=params['hopSize'])
        ifft = es.IFFT(size=params['frameSize'])
        overlSine = es.OverlapAdd(frameSize=params['frameSize'],
                                  hopSize=params['hopSize'],
                                  gain=1. / params['frameSize'])
        overlres = es.OverlapAdd(frameSize=params['frameSize'],
                                 hopSize=params['hopSize'],
                                 gain=1. / params['frameSize'])

        fft_original = []

        # analysis
        for frame in es.FrameGenerator(signal,
                                       frameSize=params["frameSize"],
                                       hopSize=params["hopSize"]):
            frame_fft = fft(w(frame))
            fft_original.append(frame_fft)
            freqs, mags, phases = smanal(frame_fft)
            pool.add("frequencies", freqs)
            pool.add("magnitudes", mags)
            pool.add("phases", phases)

        # remove short tracks
        minFrames = int(params['minSineDur'] * params['sampleRate'] /
                        params['hopSize'])
        pool = self.cleaningSineTracks(pool, minFrames)

        # synthesis
        sineTracksAudio = np.array([])
        resTracksAudio = np.array([])
        for frame_ix, _ in enumerate(pool["frequencies"]):
            sine_frame_fft = smsyn(pool["magnitudes"][frame_ix],
                                   pool["frequencies"][frame_ix],
                                   pool["phases"][frame_ix])
            res_frame_fft = fft_original[frame_ix] - sine_frame_fft
            sine_outframe = overlSine(ifft(sine_frame_fft))
            sineTracksAudio = np.append(sineTracksAudio, sine_outframe)
            res_outframe = overlres(ifft(res_frame_fft))
            resTracksAudio = np.append(resTracksAudio, res_outframe)

        sineTracksAudio = sineTracksAudio.flatten()[-len(signal):]
        resTracksAudio = resTracksAudio.flatten()[-len(signal):]

        #print("len signal", len(signal), "len res", len(resTracksAudio))
        return essentia.array(signal), essentia.array(
            sineTracksAudio), essentia.array(resTracksAudio)