class Sound(BaseModel): """ An audio processing pipeline that computes a frequency domain representation of the sound that follows a geometric scale """ bark = zounds.ArrayWithUnitsFeature(zounds.BarkBands, samplerate=samplerate, stop_freq_hz=samplerate.nyquist, needs=BaseModel.fft, store=True) long_windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, wscheme=zounds.SampleRate(frequency=zounds.Milliseconds(358), duration=zounds.Milliseconds(716)), wfunc=zounds.OggVorbisWindowingFunc(), needs=BaseModel.resampled, store=True) long_fft = zounds.ArrayWithUnitsFeature(zounds.FFT, needs=long_windowed, store=True) freq_adaptive = zounds.FrequencyAdaptiveFeature( zounds.FrequencyAdaptiveTransform, transform=np.fft.irfft, scale=scale, window_func=np.hanning, needs=long_fft, store=False) rasterized = zounds.ArrayWithUnitsFeature(lambda fa: fa.rasterize(64), needs=freq_adaptive, store=False)
class SoundWithNoSettings(BaseModel): short_windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, wscheme=windowing_scheme, wfunc=zounds.OggVorbisWindowingFunc(), needs=BaseModel.resampled) fft = zounds.ArrayWithUnitsFeature( zounds.FFT, needs=short_windowed) geom = zounds.ArrayWithUnitsFeature( spectrogram, needs=fft, store=True) log_spectrogram = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, wscheme=zounds.SampleRate( frequency=windowing_scheme.frequency * (spectrogram_duration // 2), duration=windowing_scheme.frequency * spectrogram_duration * 3), needs=geom) ls = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, wscheme=spectrogram_sample_rate, needs=geom)
class Sound(Resampled): """ A simple pipeline that computes a perceptually weighted modified discrete cosine transform, and "persists" feature data in an in-memory store. """ windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, needs=Resampled.resampled, wscheme=zounds.HalfLapped(), wfunc=zounds.OggVorbisWindowingFunc(), store=True) mdct = zounds.ArrayWithUnitsFeature(zounds.MDCT, needs=windowed) weighted = zounds.ArrayWithUnitsFeature(lambda x: x * zounds.AWeighting(), needs=mdct)
def spectrogram(x): x = apply_scale(np.abs(x.real), scale, window=zounds.OggVorbisWindowingFunc()) x = zounds.log_modulus(x * 100) return x * zounds.AWeighting()
MDCT invertibility requirements, is tricky, and requires some low level knowledge that zounds' Scale attempts to abstract away. See section 3.3 Setting MDCT Sizes for information about what we're fudging/ glossing over in this implementation. We instead use the DCT2 transform, which makes inversion easier, at the cost of more redundancy. """ from __future__ import division import zounds import scipy samplerate = zounds.SR11025() BaseModel = zounds.stft(resample_to=samplerate) windowing_func = zounds.OggVorbisWindowingFunc() scale = zounds.GeometricScale(300, 3030, 0.05, 100) @zounds.simple_in_memory_settings class Document(BaseModel): bark = zounds.ArrayWithUnitsFeature(zounds.BarkBands, samplerate=samplerate, stop_freq_hz=samplerate.nyquist, needs=BaseModel.fft, store=True) long_windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, wscheme=zounds.SampleRate(frequency=zounds.Milliseconds(500),