class ShortTimeFourierTransform(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=store_resampled) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=wscheme, wfunc=OggVorbisWindowingFunc(), store=store_windowed) fft = ArrayWithUnitsFeature(FFT, padding_samples=fft_padding_samples, needs=windowed, store=store_fft)
def test_square_form_no_overlap_add(self): samplerate = SR11025() BaseModel = stft(resample_to=samplerate) windowing_func = OggVorbisWindowingFunc() scale = GeometricScale(20, 5000, 0.1, 25) @simple_in_memory_settings class Document(BaseModel): long_windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=SampleRate(frequency=Milliseconds(500), duration=Seconds(1)), wfunc=windowing_func, needs=BaseModel.resampled, store=True) dct = ArrayWithUnitsFeature(DCT, scale_always_even=True, needs=long_windowed, store=True) mdct = FrequencyAdaptiveFeature(FrequencyAdaptiveTransform, transform=scipy.fftpack.idct, scale=scale, needs=dct, store=True) synth = TickSynthesizer(SR22050()) samples = synth.synthesize(Seconds(5), Milliseconds(200)) _id = Document.process(meta=samples.encode()) doc = Document(_id) square = doc.mdct.square(30) self.assertEqual(3, square.ndim) self.assertEqual(30, square.shape[1]) self.assertEqual(25, square.shape[2])
class Document(rs): long_windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=SampleRate(Milliseconds(500), Seconds(1)), wfunc=OggVorbisWindowingFunc(), needs=rs.resampled, store=True) long_fft = ArrayWithUnitsFeature(FFT, needs=long_windowed, store=True)
class Document(rs): windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=wscheme, wfunc=OggVorbisWindowingFunc(), needs=rs.resampled, store=False) fft = ArrayWithUnitsFeature(FFT, needs=windowed, store=False) centroid = ArrayWithUnitsFeature(SpectralCentroid, needs=fft, store=True)
class FrequencyAdaptive(BaseModel): long_windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=long_window_sample_rate, wfunc=OggVorbisWindowingFunc(), needs=BaseModel.resampled, store=False) long_fft = ArrayWithUnitsFeature(FFT, needs=long_windowed, store=False) freq_adaptive = FrequencyAdaptiveFeature( FrequencyAdaptiveTransform, transform=np.fft.irfft, scale=scale, check_scale_overlap_ratio=check_scale_overlap_ratio, window_func=np.hanning, needs=long_fft, store=store_freq_adaptive)
def test_perfect_reconstruction_using_overlap_add(self): synth = SineSynthesizer(SR22050()) audio = synth.synthesize(Seconds(10), [440., 660., 880.]) sr = SampleRate(duration=Seconds(1), frequency=Milliseconds(500)) windowed = audio.sliding_window(sr) mdct = MDCT() coeffs = list(mdct._process(windowed * OggVorbisWindowingFunc()))[0] mdct_synth = MDCTSynthesizer() recon = mdct_synth.synthesize(coeffs) # take a slice, so we can ignore boundary conditions slce = TimeSlice(start=Seconds(1), duration=Seconds(8)) np.testing.assert_allclose(recon[slce], audio[slce])
class Document(BaseModel): raw = ByteStreamFeature(ByteStream, chunksize=2 * 44100 * 30 * 2, store=True) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=True) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=samplerate, store=True) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=windowing_scheme, wfunc=OggVorbisWindowingFunc(), store=False) fft = ArrayWithUnitsFeature(FFT, needs=windowed, store=True) chroma = ArrayWithUnitsFeature(Chroma, needs=fft, frequency_band=band, store=True) bark = ArrayWithUnitsFeature(BarkBands, needs=fft, frequency_band=band, store=True) bfcc = ArrayWithUnitsFeature(BFCC, needs=bark, store=True) bfcc_sliding_window = ArrayWithUnitsFeature( SlidingWindow, needs=bfcc, wscheme=windowing_scheme * Stride(frequency=2, duration=4), store=True) bfcc_pooled = ArrayWithUnitsFeature(Max, needs=bfcc_sliding_window, axis=1, store=True)
class AudioGraph(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=False) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=HalfLapped(), wfunc=OggVorbisWindowingFunc(), store=False) dct = ArrayWithUnitsFeature(DCT, needs=windowed, store=True) fft = ArrayWithUnitsFeature(FFT, needs=windowed, store=store_fft) bark = ArrayWithUnitsFeature(BarkBands, needs=fft, frequency_band=band, store=True) centroid = ArrayWithUnitsFeature(SpectralCentroid, needs=bark, store=True) chroma = ArrayWithUnitsFeature(Chroma, needs=fft, frequency_band=band, store=True) bfcc = ArrayWithUnitsFeature(BFCC, needs=fft, store=True)