def get_sines_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20): """ Perform framewise sinusoidal model in an audio :param audio: Audio either mono or stereo. Will be downsampled to mono :param sr: Samplerate used for the audio :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively. """ if audio.ndim > 1: audio = std.MonoMixer()(audio, audio.shape[1]) len_arrays = 0 for i, _ in enumerate( std.FrameGenerator(audio, frameSize=4096, hopSize=2048)): len_arrays = i fft_algo = std.FFT() sine_anal = std.SineModelAnal(maxnSines=nsines, orderBy='frequency', minFrequency=1) sines = np.zeros([len_arrays + 1, 2, nsines], dtype=np.float32) + eps for i, frame in enumerate( std.FrameGenerator(audio, frameSize=4096, hopSize=2048)): fft = fft_algo(frame) freqs, mags, _ = sine_anal(fft) sorting_indexes = np.argsort(freqs) freqs = freqs[sorting_indexes] mags = mags[sorting_indexes] sines[i, :] = [freqs, mags] if onlyfrecuencies: return sines[:, 0, :] else: return sines[:, 0, :], sines[:, 1, :]
def get_hpeaks_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20): """ Get Harmonic peaks in an audio :param audio: Audio either mono or stereo. Will be downsampled to mono :param sr: Samplerate used for the audio :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively. """ if audio.ndim > 1: audio = std.MonoMixer()(audio, audio.shape[1]) fft_algo = std.FFT() pyin = std.PitchYin() hpeaks = std.HarmonicPeaks() sine_anal = std.SineModelAnal(maxnSines=nsines, orderBy='frequency', minFrequency=1) sines = [] for i, frame in enumerate( std.FrameGenerator(audio, frameSize=4096, hopSize=2048)): pitch, _ = pyin(frame) fft = fft_algo(frame) freqs, mags, _ = sine_anal(fft) sorting_indexes = np.argsort(freqs) freqs = freqs[sorting_indexes] mags = mags[sorting_indexes] non_zero_freqs = np.where(freqs != 0) freqs = freqs[non_zero_freqs] mags = mags[non_zero_freqs] freqs, mags = hpeaks(freqs, mags, pitch) sines.append([freqs, mags]) sines = np.array(sines) if onlyfrecuencies: return sines[:, 0, :] else: return sines[:, 0, :], sines[:, 1, :]
def nSinesRead(audio_vector): sineanal = estd.SineModelAnal(maxnSines=20) fft_calc = estd.FFT(size=2048) results = [] for frame in estd.FrameGenerator(audio_vector, 2048, 1024): spec = fft_calc(frame) results.append(sineanal(spec)) results = np.array(results) freqs = results[:, 0, :] mags = results[:, 1, :] return freqs, mags
def analysis_synthesis_spr_model_standard(self, params, signal): pool = essentia.Pool() # Streaming Algos for Sine Model Analysis w = es.Windowing(type="hann") fft = es.FFT(size=params['fftSize']) smanal = es.SineModelAnal( sampleRate=params['sampleRate'], maxnSines=params['maxnSines'], magnitudeThreshold=params['magnitudeThreshold'], freqDevOffset=params['freqDevOffset'], freqDevSlope=params['freqDevSlope']) # Standard Algos for Sine Model Analysis smsyn = es.SineModelSynth(sampleRate=params['sampleRate'], fftSize=params['frameSize'], hopSize=params['hopSize']) ifft = es.IFFT(size=params['frameSize']) overlSine = es.OverlapAdd(frameSize=params['frameSize'], hopSize=params['hopSize'], gain=1. / params['frameSize']) overlres = es.OverlapAdd(frameSize=params['frameSize'], hopSize=params['hopSize'], gain=1. / params['frameSize']) fft_original = [] # analysis for frame in es.FrameGenerator(signal, frameSize=params["frameSize"], hopSize=params["hopSize"]): frame_fft = fft(w(frame)) fft_original.append(frame_fft) freqs, mags, phases = smanal(frame_fft) pool.add("frequencies", freqs) pool.add("magnitudes", mags) pool.add("phases", phases) # remove short tracks minFrames = int(params['minSineDur'] * params['sampleRate'] / params['hopSize']) pool = self.cleaningSineTracks(pool, minFrames) # synthesis sineTracksAudio = np.array([]) resTracksAudio = np.array([]) for frame_ix, _ in enumerate(pool["frequencies"]): sine_frame_fft = smsyn(pool["magnitudes"][frame_ix], pool["frequencies"][frame_ix], pool["phases"][frame_ix]) res_frame_fft = fft_original[frame_ix] - sine_frame_fft sine_outframe = overlSine(ifft(sine_frame_fft)) sineTracksAudio = np.append(sineTracksAudio, sine_outframe) res_outframe = overlres(ifft(res_frame_fft)) resTracksAudio = np.append(resTracksAudio, res_outframe) sineTracksAudio = sineTracksAudio.flatten()[-len(signal):] resTracksAudio = resTracksAudio.flatten()[-len(signal):] #print("len signal", len(signal), "len res", len(resTracksAudio)) return essentia.array(signal), essentia.array( sineTracksAudio), essentia.array(resTracksAudio)