def get_sines_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20): """ Perform framewise sinusoidal model in an audio :param audio: Audio either mono or stereo. Will be downsampled to mono :param sr: Samplerate used for the audio :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively. """ if audio.ndim > 1: audio = std.MonoMixer()(audio, audio.shape[1]) len_arrays = 0 for i, _ in enumerate( std.FrameGenerator(audio, frameSize=4096, hopSize=2048)): len_arrays = i fft_algo = std.FFT() sine_anal = std.SineModelAnal(maxnSines=nsines, orderBy='frequency', minFrequency=1) sines = np.zeros([len_arrays + 1, 2, nsines], dtype=np.float32) + eps for i, frame in enumerate( std.FrameGenerator(audio, frameSize=4096, hopSize=2048)): fft = fft_algo(frame) freqs, mags, _ = sine_anal(fft) sorting_indexes = np.argsort(freqs) freqs = freqs[sorting_indexes] mags = mags[sorting_indexes] sines[i, :] = [freqs, mags] if onlyfrecuencies: return sines[:, 0, :] else: return sines[:, 0, :], sines[:, 1, :]
def get_hpeaks_per_frame(audio, sr=44100, onlyfrecuencies=False, nsines=20): """ Get Harmonic peaks in an audio :param audio: Audio either mono or stereo. Will be downsampled to mono :param sr: Samplerate used for the audio :return: Nx2x100. N is the number of resulting frames. 2x100 are the frequencies and magnitudes respectively. """ if audio.ndim > 1: audio = std.MonoMixer()(audio, audio.shape[1]) fft_algo = std.FFT() pyin = std.PitchYin() hpeaks = std.HarmonicPeaks() sine_anal = std.SineModelAnal(maxnSines=nsines, orderBy='frequency', minFrequency=1) sines = [] for i, frame in enumerate( std.FrameGenerator(audio, frameSize=4096, hopSize=2048)): pitch, _ = pyin(frame) fft = fft_algo(frame) freqs, mags, _ = sine_anal(fft) sorting_indexes = np.argsort(freqs) freqs = freqs[sorting_indexes] mags = mags[sorting_indexes] non_zero_freqs = np.where(freqs != 0) freqs = freqs[non_zero_freqs] mags = mags[non_zero_freqs] freqs, mags = hpeaks(freqs, mags, pitch) sines.append([freqs, mags]) sines = np.array(sines) if onlyfrecuencies: return sines[:, 0, :] else: return sines[:, 0, :], sines[:, 1, :]
def mix_to_mono(audio): """ Mix an audio file down to mono :param audio: (np.ndarray) audio samples :return: (nd.ndarray) mono audio samples """ mono_mix = es.MonoMixer() samples = mono_mix(audio, audio.shape[1]) return samples
def mix(audio1, audio2, sr): """ Function to mix audios with a normalised loudness :param audio1: Audio vector to normalize :param audio2: Audio vector to normalize :param sr: Sample rate of the final mix :return: Audio vector of the normalised mix """ if audio1.ndim > 1: audio1 = std.MonoMixer()(audio1, audio1.shape[1]) if audio2.ndim > 1: audio2 = std.MonoMixer()(audio2, audio2.shape[1]) std.MonoWriter(filename='temporal1.wav', sampleRate=sr)(audio1) std.MonoWriter(filename='temporal2.wav', sampleRate=sr)(audio2) stream1 = (ffmpeg.input('temporal1.wav').filter('loudnorm')) stream2 = (ffmpeg.input('temporal2.wav').filter('loudnorm')) merged_audio = ffmpeg.filter([stream1, stream2], 'amix') ffmpeg.output(merged_audio, 'temporal_o.wav').overwrite_output().run() audio_numpy = std.MonoLoader(filename='temporal_o.wav')() return audio_numpy
# Compute beat positions and BPM rhythm_extractor = esst.RhythmExtractor2013(method="multifeature") bpm, beats, beats_confidence, _, beats_intervals = rhythm_extractor( audio) # print("BPM:", bpm) # print("Beat positions (sec.):", beats) print("Beat estimation confidence:", beats_confidence) for i in range(len(beats)): trim = esst.Trimmer(startTime=[*map(lambda x: x - 0.01, beats)][i], endTime=[*map(lambda x: x + 0.15, beats)][i]) \ (audio) if len(mixed_sample): trim = np.resize(trim, mixed_sample.size) stereo_mix = esst.StereoMuxer()(mixed_sample, trim) esst.MonoMixer()(stereo_mix, 2) else: mixed_sample = trim output_array = np.concatenate([output_array, mixed_sample]) esst.MonoWriter(filename=f"../samples/mix_beat{str(i)}.mp3")(output_array) # Mark beat positions on the audio and write it to a file # Let's use beeps instead of white noise to mark them, as it's more distinctive # marker = AudioOnsetsMarker(onsets=beats, type='beep') # marked_audio = marker(audio) # MonoWriter(filename='../samples/dubstep_beats.flac')(marked_audio)