def setUp(self): self.samplerate = SR44100() rs = resampled(resample_to=self.samplerate) wscheme = HalfLapped() @simple_in_memory_settings class Document(rs): windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=wscheme, wfunc=OggVorbisWindowingFunc(), needs=rs.resampled, store=False) fft = ArrayWithUnitsFeature( FFT, needs=windowed, store=False) centroid = ArrayWithUnitsFeature( SpectralCentroid, needs=fft, store=True) ss = SineSynthesizer(self.samplerate) chunks = \ [ss.synthesize(Seconds(1), [440 * i]) for i in range(1, 6)] self.audio = \ AudioSamples(ArrayWithUnits.concat(chunks), self.samplerate) _id = Document.process(meta=self.audio.encode()) self.doc = Document(_id)
def _process(self, data): sr = data.samples_per_second if self._resample is None: target_sr = self._samplerate.samples_per_second self._resample = Resample( sr, target_sr, 1 if len(data.shape) == 1 else data.shape[1]) if target_sr != sr: self._rs = self._resample # KLUDGE: The following line seems to solve a bug whereby # libsamplerate doesn't generate enough samples the first time # src_process is called. We're calling it once here, so the "real" # output will come out click-free silence = AudioSamples.silence(self._samplerate, Seconds(1), channels=data.channels) self._resample(silence) else: self._rs = self._noop resampled = self._rs(data, self._finalized) if not isinstance(resampled, ArrayWithUnits): resampled = AudioSamples(resampled, self._samplerate) yield resampled
def _check(self, samplerate, expected_window_size, expected_step_size): samples = AudioSamples( np.zeros(5 * samplerate.samples_per_second), samplerate) wscheme = samplerate.half_lapped() ws, ss = samples._sliding_window_integer_slices( TimeSlice(wscheme.duration), TimeSlice(wscheme.frequency)) self.assertEqual(expected_window_size, ws[0]) self.assertEqual(expected_step_size, ss[0])
def _check(self, samplerate, expected_window_size, expected_step_size): samples = AudioSamples(np.zeros(5 * samplerate.samples_per_second), samplerate) wscheme = samplerate.half_lapped() ws, ss = samples._sliding_window_integer_slices( TimeSlice(wscheme.duration), TimeSlice(wscheme.frequency)) self.assertEqual(expected_window_size, ws[0]) self.assertEqual(expected_step_size, ss[0])
def __getitem__(self, timeslice): sr = audio_sample_rate(self.samplerate) if timeslice == slice(None): self._sf.seek(0) return AudioSamples(self._sf.read(len(self._sf)), sr) start_sample = int(timeslice.start / self._freq) n_samples = self._n_samples(timeslice.duration) self._sf.seek(start_sample) return AudioSamples(self._sf.read(n_samples), sr)
def test_can_apply_sliding_windows_in_succession(self): samplerate = SR11025() short_window = samplerate * (16, 512) long_window = SampleRate( frequency=short_window.frequency * 1, duration=short_window.frequency * 64) rs = resampled(resample_to=samplerate, store_resampled=True) samples = AudioSamples.silence(samplerate, Seconds(10)) @simple_in_memory_settings class Sound(rs): short_windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=short_window, needs=rs.resampled) long_windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=long_window, needs=short_windowed) _id = Sound.process(meta=samples.encode()) snd = Sound(_id) self.assertEqual((512,), snd.short_windowed.shape[1:]) self.assertEqual((64, 512), snd.long_windowed.shape[1:])
def _get_samples(self): raw_samples = self._sf.read(self._chunk_size_samples) sr = audio_sample_rate(self._sf.samplerate) samples = AudioSamples(raw_samples, sr) if self._sum_to_mono: return samples.mono return samples
def real_stft(self): snd = self.sound_cls.random() windowed = choice(snd.windowed) windowed = AudioSamples( windowed, audio_sample_rate(windowed.dimensions[0].samples_per_second)) return self._stft(windowed)
def synthesize(self, duration): """ Synthesize silence Args: duration (numpy.timedelta64): The duration of the synthesized sound """ return AudioSamples.silence(self.samplerate, duration)
def synthesize(self, duration): """ Synthesize silence Args: duration (numpy.timedelta64): The duration of the synthesized sound """ return AudioSamples.silence(self.samplerate, duration)
def test_can_round_trip_audio_samples(self): raw = np.random.random_sample(11025 * 10) arr = AudioSamples(raw, SR11025()) decoded = self._roundtrip(arr) self.assertIsInstance(decoded, ArrayWithUnits) self.assertEqual(1, len(decoded.dimensions)) td = decoded.dimensions[0] self.assertIsInstance(td, TimeDimension) np.testing.assert_allclose(decoded, raw)
def synthesize(self, duration): """ Synthesize white noise Args: duration (numpy.timedelta64): The duration of the synthesized sound """ sr = self.samplerate.samples_per_second seconds = duration / Seconds(1) samples = np.random.uniform(low=-1., high=1., size=int(sr * seconds)) return AudioSamples(samples, self.samplerate)
def test_smoke(self): samples = AudioSamples.silence(SR22050(), Seconds(1)) samplerate = SR22050() scale = GeometricScale(start_center_hz=20, stop_center_hz=5000, bandwidth_ratio=1.2, n_bands=8) scale.ensure_overlap_ratio(0.5) taps = 16 filter_bank = fir_filter_bank(scale, taps, samplerate, np.hanning(3)) correlogram = auto_correlogram(samples, filter_bank) self.assertEqual(3, correlogram.ndim)
class SpectralCentroidTests(unittest2.TestCase): def setUp(self): self.samplerate = SR44100() rs = resampled(resample_to=self.samplerate) wscheme = HalfLapped() @simple_in_memory_settings class Document(rs): windowed = ArrayWithUnitsFeature( SlidingWindow, wscheme=wscheme, wfunc=OggVorbisWindowingFunc(), needs=rs.resampled, store=False) fft = ArrayWithUnitsFeature( FFT, needs=windowed, store=False) centroid = ArrayWithUnitsFeature( SpectralCentroid, needs=fft, store=True) ss = SineSynthesizer(self.samplerate) chunks = \ [ss.synthesize(Seconds(1), [440 * i]) for i in range(1, 6)] self.audio = \ AudioSamples(ArrayWithUnits.concat(chunks), self.samplerate) _id = Document.process(meta=self.audio.encode()) self.doc = Document(_id) def test_has_correct_type(self): self.assertIsInstance(self.doc.centroid, ArrayWithUnits) def test_has_correct_dimensions(self): self.assertEqual(1, len(self.doc.centroid.dimensions)) def test_has_correct_duration(self): self.assertAlmostEqual( self.audio.dimensions[0].end_seconds, self.doc.centroid.dimensions[0].end_seconds, delta=0.02) def test_centroid_is_monotonically_increasing(self): chunked = self.doc.centroid \ .sliding_window((TimeSlice(Seconds(1)),)) \ .mean(axis=1) diff = np.diff(chunked) self.assertTrue(np.all(diff >= 0))
def test_smoke(self): samples = AudioSamples.silence(SR22050(), Seconds(1)) samplerate = SR22050() scale = GeometricScale( start_center_hz=20, stop_center_hz=5000, bandwidth_ratio=1.2, n_bands=8) scale.ensure_overlap_ratio(0.5) taps = 16 filter_bank = fir_filter_bank(scale, taps, samplerate, np.hanning(3)) correlogram = auto_correlogram(samples, filter_bank) self.assertEqual(3, correlogram.ndim)
def synthesize(self, duration, freqs_in_hz=[440.]): """ Synthesize one or more sine waves Args: duration (numpy.timdelta64): The duration of the sound to be synthesized freqs_in_hz (list of float): Numbers representing the frequencies in hz that should be synthesized """ freqs = np.array(freqs_in_hz) scaling = 1 / len(freqs) sr = int(self.samplerate) cps = freqs / sr ts = (duration / Seconds(1)) * sr ranges = np.array([np.arange(0, ts * c, c) for c in cps]) raw = (np.sin(ranges * (2 * np.pi)) * scaling).sum(axis=0) return AudioSamples(raw, self.samplerate)
def _overlap_add(self, frames): time_dim = frames.dimensions[0] sample_freq = time_dim.duration / frames.shape[-1] windowsize = int(np.round(time_dim.duration / sample_freq)) hopsize = int(np.round(time_dim.frequency / sample_freq)) # create an empty array of audio samples arr = np.zeros(int(time_dim.end / sample_freq)) windowed_frames = self._windowing_function() * frames for i, f in enumerate(windowed_frames): start = i * hopsize stop = start + windowsize l = len(arr[start:stop]) arr[start:stop] += f[:l] sr = nearest_audio_sample_rate(Seconds(1) / sample_freq) return AudioSamples(arr, sr)
def synthesize(self, duration, tick_frequency): """ Synthesize periodic "ticks", generated from white noise and an envelope Args: duration (numpy.timedelta64): The total duration of the sound to be synthesized tick_frequency (numpy.timedelta64): The frequency of the ticking sound """ sr = self.samplerate.samples_per_second # create a short, tick sound tick = np.random.uniform(low=-1., high=1., size=int(sr * .1)) tick *= np.linspace(1, 0, len(tick)) # create silence samples = np.zeros(int(sr * (duration / Seconds(1)))) ticks_per_second = Seconds(1) / tick_frequency # introduce periodic ticking sound step = int(sr // ticks_per_second) for i in range(0, len(samples), step): size = len(samples[i:i + len(tick)]) samples[i:i + len(tick)] += tick[:size] return AudioSamples(samples, self.samplerate)
def test_can_apply_sliding_windows_in_succession(self): samplerate = SR11025() short_window = samplerate * (16, 512) long_window = SampleRate(frequency=short_window.frequency * 1, duration=short_window.frequency * 64) rs = resampled(resample_to=samplerate, store_resampled=True) samples = AudioSamples.silence(samplerate, Seconds(10)) @simple_in_memory_settings class Sound(rs): short_windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=short_window, needs=rs.resampled) long_windowed = ArrayWithUnitsFeature(SlidingWindow, wscheme=long_window, needs=short_windowed) _id = Sound.process(meta=samples.encode()) snd = Sound(_id) self.assertEqual((512, ), snd.short_windowed.shape[1:]) self.assertEqual((64, 512), snd.long_windowed.shape[1:])
def __iter__(self): local_metadata = ensure_local_file(self._metadata, self.path) metadata = dict() with open(local_metadata, 'rb') as f: reader = csv.DictReader(f) for row in reader: metadata[row['id']] = row train_audio_path = os.path.join(self.path, 'train_data') for filename in os.listdir(train_audio_path): full_path = os.path.join(train_audio_path, filename) _id, ext = os.path.splitext(filename) url = \ 'https://homes.cs.washington.edu/~thickstn/media/{_id}'\ .format(**locals()) meta = metadata[_id] samples = AudioSamples.from_file(full_path) uri = PreDownload(samples.encode().read(), url) yield AudioMetaData(uri=uri, samplerate=int(self._samplerate), **meta)
def __iter__(self): local_metadata = ensure_local_file(self._metadata, self.path) metadata = dict() with open(local_metadata, 'rb') as f: reader = csv.DictReader(f) for row in reader: metadata[row['id']] = row train_audio_path = os.path.join(self.path, 'train_data') for filename in os.listdir(train_audio_path): full_path = os.path.join(train_audio_path, filename) _id, ext = os.path.splitext(filename) url = \ 'https://homes.cs.washington.edu/~thickstn/media/{_id}'\ .format(**locals()) meta = metadata[_id] samples = AudioSamples.from_file(full_path) uri = PreDownload(samples.encode().read(), url) yield AudioMetaData( uri=uri, samplerate=int(self._samplerate), **meta)
def __call__(self, flo): raw = super(AudioSamplesDecoder, self).__call__(flo) samplerate = audio_sample_rate(raw.dimensions[0].samples_per_second) return AudioSamples(raw, samplerate)
def test_can_decompose_audio_samples(self): samples = AudioSamples.silence(SR22050(), Seconds(1)) bands = frequency_decomposition(samples, [64, 128, 256, 512, 1024]) expected_td = TimeDimension(samples.end, samples.end) self.assertEqual(expected_td, bands.dimensions[0]) self.assertIsInstance(bands.dimensions[1], ExplicitFrequencyDimension)
def fake_audio(self): sample = choice(self.fake_samples) sample = self.real_sample_transformer(sample) return AudioSamples(sample, self.samplerate) \ .pad_with_silence(Seconds(1))
def test_can_decompose_audio_samples(self): samples = AudioSamples.silence(SR22050(), Seconds(1)) bands = frequency_decomposition(samples, [64, 128, 256, 512, 1024]) expected_td = TimeDimension(samples.end, samples.end) self.assertEqual(expected_td, bands.dimensions[0]) self.assertIsInstance(bands.dimensions[1], ExplicitFrequencyDimension)
def resample(samples, new_sample_rate): if new_sample_rate == samples.samplerate: return samples rs = Resampler(new_sample_rate) new_samples = np.concatenate(list(rs._process(samples))) return AudioSamples(new_samples, new_sample_rate)