class ShortTimeFourierTransform(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=store_resampled) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=wscheme, wfunc=OggVorbisWindowingFunc(), store=store_windowed) fft = ArrayWithUnitsFeature(FFT, padding_samples=fft_padding_samples, needs=windowed, store=store_fft)
class Resampled(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=store_resampled)
class Document(BaseModel): raw = ByteStreamFeature(ByteStream, chunksize=2 * 44100 * 30 * 2, store=True) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=True) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=samplerate, store=True) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=windowing_scheme, wfunc=OggVorbisWindowingFunc(), store=False) fft = ArrayWithUnitsFeature(FFT, needs=windowed, store=True) chroma = ArrayWithUnitsFeature(Chroma, needs=fft, frequency_band=band, store=True) bark = ArrayWithUnitsFeature(BarkBands, needs=fft, frequency_band=band, store=True) bfcc = ArrayWithUnitsFeature(BFCC, needs=bark, store=True) bfcc_sliding_window = ArrayWithUnitsFeature( SlidingWindow, needs=bfcc, wscheme=windowing_scheme * Stride(frequency=2, duration=4), store=True) bfcc_pooled = ArrayWithUnitsFeature(Max, needs=bfcc_sliding_window, axis=1, store=True)
class AudioGraph(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=False) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=HalfLapped(), wfunc=OggVorbisWindowingFunc(), store=False) dct = ArrayWithUnitsFeature(DCT, needs=windowed, store=True) fft = ArrayWithUnitsFeature(FFT, needs=windowed, store=store_fft) bark = ArrayWithUnitsFeature(BarkBands, needs=fft, frequency_band=band, store=True) centroid = ArrayWithUnitsFeature(SpectralCentroid, needs=bark, store=True) chroma = ArrayWithUnitsFeature(Chroma, needs=fft, frequency_band=band, store=True) bfcc = ArrayWithUnitsFeature(BFCC, needs=fft, store=True)