def resampled(chunksize_bytes=DEFAULT_CHUNK_SIZE, resample_to=SR44100(), store_resampled=False): """ Create a basic processing pipeline that can resample all incoming audio to a normalized sampling rate for downstream processing, and store a convenient, compressed version for playback :param chunksize_bytes: The number of bytes from the raw stream to process at once :param resample_to: The new, normalized sampling rate :return: A simple processing pipeline """ class Resampled(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=store_resampled) return Resampled
def setUp(self): self.samplerate = SR44100() rs = resampled(resample_to=self.samplerate) wscheme = HalfLapped() @simple_in_memory_settings class Document(rs): windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=wscheme, wfunc=OggVorbisWindowingFunc(), needs=rs.resampled, store=False) fft = ArrayWithUnitsFeature( FFT, needs=windowed, store=False) centroid = ArrayWithUnitsFeature( SpectralCentroid, needs=fft, store=True) ss = SineSynthesizer(self.samplerate) chunks = \ [ss.synthesize(Seconds(1), [440 * i]) for i in range(1, 6)] self.audio = \ AudioSamples(ArrayWithUnits.concat(chunks), self.samplerate) _id = Document.process(meta=self.audio.encode()) self.doc = Document(_id)
def frequency_adaptive(long_window_sample_rate, scale, store_freq_adaptive=False, check_scale_overlap_ratio=False, chunksize_bytes=DEFAULT_CHUNK_SIZE, resample_to=SR44100(), store_resampled=False): BaseModel = resampled(chunksize_bytes, resample_to, store_resampled) class FrequencyAdaptive(BaseModel): long_windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=long_window_sample_rate, wfunc=OggVorbisWindowingFunc(), needs=BaseModel.resampled, store=False) long_fft = ArrayWithUnitsFeature(FFT, needs=long_windowed, store=False) freq_adaptive = FrequencyAdaptiveFeature( FrequencyAdaptiveTransform, transform=np.fft.irfft, scale=scale, check_scale_overlap_ratio=check_scale_overlap_ratio, window_func=np.hanning, needs=long_fft, store=store_freq_adaptive) return FrequencyAdaptive
def test_can_repr(self): cs = ChunkSizeBytes(SR44100(), Seconds(30), channels=2, bit_depth=16) s = cs.__repr__() self.assertEqual( 'ChunkSizeBytes(samplerate=SR44100(f=2.2675736e-05, ' 'd=2.2675736e-05), duration=30 seconds, channels=2, bit_depth=16)', s)
def test_matches_fftfreq(self): samplerate = SR44100() n_bands = 2048 fft_freqs = np.fft.rfftfreq(n_bands, 1 / int(samplerate)) bands = LinearScale.from_sample_rate(samplerate, n_bands // 2) linear_freqs = np.array([b.start_hz for b in bands]) np.testing.assert_allclose(linear_freqs, fft_freqs[:-1])
def audio_graph(chunksize_bytes=DEFAULT_CHUNK_SIZE, resample_to=SR44100(), store_fft=False): """ Produce a base class suitable as a starting point for many audio processing pipelines. This class resamples all audio to a common sampling rate, and produces a bark band spectrogram from overlapping short-time fourier transform frames. It also compresses the audio into ogg vorbis format for compact storage. """ band = FrequencyBand(20, resample_to.nyquist) class AudioGraph(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=False) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=HalfLapped(), wfunc=OggVorbisWindowingFunc(), store=False) dct = ArrayWithUnitsFeature(DCT, needs=windowed, store=True) fft = ArrayWithUnitsFeature(FFT, needs=windowed, store=store_fft) bark = ArrayWithUnitsFeature(BarkBands, needs=fft, frequency_band=band, store=True) centroid = ArrayWithUnitsFeature(SpectralCentroid, needs=bark, store=True) chroma = ArrayWithUnitsFeature(Chroma, needs=fft, frequency_band=band, store=True) bfcc = ArrayWithUnitsFeature(BFCC, needs=fft, store=True) return AudioGraph
def test_has_correct_sample_rate(self): half_lapped = HalfLapped() synth = DCTSynthesizer() raw = np.zeros((100, 2048)) band = FrequencyBand(0, SR44100().nyquist) scale = LinearScale(band, raw.shape[1]) timeseries = ArrayWithUnits( raw, [TimeDimension(*half_lapped), FrequencyDimension(scale)]) output = synth.synthesize(timeseries) self.assertIsInstance(output.samplerate, SR44100) self.assertIsInstance(output, AudioSamples)
def test_can_do_multithreaded_resampling(self): synth = SilenceSynthesizer(SR44100()) audio = [synth.synthesize(Seconds(5)) for _ in xrange(10)] pool = ThreadPool(4) def x(samples): rs = Resample(int(SR44100()), int(SR11025())) return rs(samples, end_of_input=True) resampled = pool.map(x, audio) self.assertEqual(10, len(resampled))
def test_can_get_all_even_sized_bands(self): samplerate = SR44100() scale = LinearScale.from_sample_rate(samplerate, 44100, always_even=True) log_scale = GeometricScale(20, 20000, 0.01, 64) slices = [scale.get_slice(band) for band in log_scale] sizes = [s.stop - s.start for s in slices] self.assertTrue( not any([s % 2 for s in sizes]), 'All slice sizes should be even but were {sizes}'.format( **locals()))
def setUp(self): @simple_in_memory_settings class Document(stft(store_fft=True)): pass synth = NoiseSynthesizer(SR44100()) audio = synth.synthesize(Seconds(2)) _id = Document.process(meta=audio.encode()) doc = Document(_id) non_doc = SomethingElse(11) parser = FeatureParser(Document, locals()) self.document = Document self.doc = doc self.parser = parser
def windowed(wscheme, chunksize_bytes=DEFAULT_CHUNK_SIZE, resample_to=SR44100(), store_resampled=True, store_windowed=False, wfunc=None): rs = resampled(chunksize_bytes=chunksize_bytes, resample_to=resample_to, store_resampled=store_resampled) class Sound(rs): windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=wscheme, wfunc=wfunc, needs=rs.resampled, store=store_windowed) return Sound
def test_can_encode_and_decode_variable_rate_time_Series(self): class TimestampEmitter(ff.Node): def __init__(self, needs=None): super(TimestampEmitter, self).__init__(needs=needs) self.pos = Picoseconds(0) def _process(self, data): td = data.dimensions[0] frequency = td.frequency timestamps = [self.pos + (i * frequency) for i, d in enumerate(data) if random() > 0.9] slices = TimeSlice.slices(timestamps) yield VariableRateTimeSeries( (ts, np.zeros(0)) for ts in slices) self.pos += frequency * len(data) graph = stft(store_fft=True) @simple_in_memory_settings class Document(graph): slices = TimeSliceFeature( TimestampEmitter, needs=graph.fft, store=True) pooled = VariableRateTimeSeriesFeature( Pooled, op=np.max, axis=0, needs=(slices, graph.fft), store=False) signal = NoiseSynthesizer(SR44100())\ .synthesize(Seconds(10))\ .encode() _id = Document.process(meta=signal) doc = Document(_id) self.assertIsInstance(doc.pooled, VariableRateTimeSeries) self.assertEqual(doc.fft.shape[1], doc.pooled.slicedata.shape[1])
def stft(chunksize_bytes=DEFAULT_CHUNK_SIZE, resample_to=SR44100(), wscheme=HalfLapped(), store_fft=False, fft_padding_samples=None, store_windowed=False, store_resampled=False): class ShortTimeFourierTransform(BaseModel): meta = JSONFeature(MetaData, store=True, encoder=AudioMetaDataEncoder) raw = ByteStreamFeature(ByteStream, chunksize=chunksize_bytes, needs=meta, store=False) ogg = OggVorbisFeature(OggVorbis, needs=raw, store=True) pcm = AudioSamplesFeature(AudioStream, needs=raw, store=False) resampled = AudioSamplesFeature(Resampler, needs=pcm, samplerate=resample_to, store=store_resampled) windowed = ArrayWithUnitsFeature(SlidingWindow, needs=resampled, wscheme=wscheme, wfunc=OggVorbisWindowingFunc(), store=store_windowed) fft = ArrayWithUnitsFeature(FFT, padding_samples=fft_padding_samples, needs=windowed, store=store_fft) return ShortTimeFourierTransform
def __init__(self, path): super(MusicNet, self).__init__() self.path = path self._metadata = \ 'https://homes.cs.washington.edu/~thickstn/media/musicnet_metadata.csv' self._samplerate = SR44100()
import numpy as np from featureflow import BaseModel, JSONFeature, ByteStream, ByteStreamFeature from zounds.soundfile import \ MetaData, AudioMetaDataEncoder, OggVorbis, OggVorbisFeature, AudioStream, \ Resampler, ChunkSizeBytes from zounds.segment import \ ComplexDomain, MovingAveragePeakPicker, TimeSliceFeature from zounds.persistence import ArrayWithUnitsFeature, AudioSamplesFeature, \ FrequencyAdaptiveFeature from zounds.timeseries import SR44100, HalfLapped, Stride, Seconds from zounds.spectral import \ SlidingWindow, OggVorbisWindowingFunc, FFT, BarkBands, SpectralCentroid, \ Chroma, BFCC, DCT, FrequencyAdaptiveTransform, FrequencyBand DEFAULT_CHUNK_SIZE = ChunkSizeBytes(samplerate=SR44100(), duration=Seconds(30), bit_depth=16, channels=2) def resampled(chunksize_bytes=DEFAULT_CHUNK_SIZE, resample_to=SR44100(), store_resampled=False): """ Create a basic processing pipeline that can resample all incoming audio to a normalized sampling rate for downstream processing, and store a convenient, compressed version for playback :param chunksize_bytes: The number of bytes from the raw stream to process at once :param resample_to: The new, normalized sampling rate
def __init__(self, samplerate=None, needs=None): super(Resampler, self).__init__(needs=needs) self._samplerate = samplerate or SR44100() self._resample = None
def test_can_invert_fft_44100(self): self.can_invert_fft(SR44100())
def test_correct_output_with_stereo(self): synth = SilenceSynthesizer(SR44100()) samples = synth.synthesize(Seconds(1)).stereo rs = Resample(int(samples.samplerate), int(SR11025()), nchannels=2) resampled = rs(samples, end_of_input=True) self.assertEqual((11025, 2), resampled.shape)
def test_audible_range_lower_bound(self): band = FrequencyBand.audible_range(SR44100()) self.assertEqual(20, band.start_hz)
def test_can_convert_to_integer_number_of_bytes(self): cs = ChunkSizeBytes(SR44100(), Seconds(30), channels=2, bit_depth=16) self.assertEqual(5292000, int(cs))
def test_correct_window_and_step_size_at_44100(self): self._check(SR44100(), 2048, 1024)
def test_audible_range_upper_bound(self): sr = SR44100() band = FrequencyBand.audible_range(sr) self.assertEqual(int(sr) // 2, band.stop_hz)
def soundfile(flo=None): synth = NoiseSynthesizer(SR44100()) samples = synth.synthesize(Seconds(5)).stereo flo = samples.encode(flo=flo) return samples, flo
from soundfile import SoundFile from zounds.timeseries import TimeSlice, AudioSamples, SR44100, HalfLapped, \ Seconds, Milliseconds, Stride from zounds.persistence import ArrayWithUnitsFeature, AudioSamplesFeature from zounds.soundfile import \ AudioStream, OggVorbis, OggVorbisFeature, Resampler from zounds.spectral import \ SlidingWindow, OggVorbisWindowingFunc, FFT, Chroma, BarkBands, BFCC, \ FrequencyBand from zounds.basic import Max from zounds.util import simple_in_memory_settings from featureflow import * windowing_scheme = HalfLapped() samplerate = SR44100() band = FrequencyBand(20, samplerate.nyquist) @simple_in_memory_settings class Document(BaseModel): raw = ByteStreamFeature( ByteStream, chunksize=2 * 44100 * 30 * 2, store=True) ogg = OggVorbisFeature( OggVorbis, needs=raw, store=True)
def test_generates_correct_samplerate(self): ss = SineSynthesizer(SR44100()) audio = ss.synthesize(Seconds(4), freqs_in_hz=[440.]) self.assertEqual(SR44100(), audio.samplerate)
def setUp(self): self.samplerate = SR44100() self.wscheme = HalfLapped() self.STFT = stft(store_fft=True, resample_to=self.samplerate, wscheme=self.wscheme)
def x(samples): rs = Resample(int(SR44100()), int(SR11025())) return rs(samples, end_of_input=True)