def test_sliding_window_has_correct_dimensions(self): arr = np.random.randint(0, 255, (11025 * 2)).astype(np.int64) sr = SR11025() awu = ArrayWithUnits(arr, [TimeDimension(*sr)]) ws = TimeSlice(duration=sr.frequency * 8192) ss = TimeSlice(duration=sr.frequency * 4096) l, x = awu.sliding_window_with_leftovers(ws, ss) self.assertEqual(8192, x.shape[1])
def _check(self, samplerate, expected_window_size, expected_step_size): samples = AudioSamples(np.zeros(5 * samplerate.samples_per_second), samplerate) wscheme = samplerate.half_lapped() ws, ss = samples._sliding_window_integer_slices( TimeSlice(wscheme.duration), TimeSlice(wscheme.frequency)) self.assertEqual(expected_window_size, ws[0]) self.assertEqual(expected_step_size, ss[0])
def test_can_convert_to_categorical_distribution(self): samplerate = SR11025() synth = SineSynthesizer(samplerate) samples = synth.synthesize(Seconds(4), [220, 440, 880]) _, windowed = samples.sliding_window_with_leftovers( TimeSlice(duration=samplerate.frequency * 512), TimeSlice(duration=samplerate.frequency * 256)) c = categorical(windowed, mu=255) self.assertEqual(windowed.shape + (255 + 1, ), c.shape) np.testing.assert_allclose(c.sum(axis=-1), 1)
def test_can_take_fft_of_2d_stacked_signal(self): samples = SilenceSynthesizer(SR22050()).synthesize(Milliseconds(2500)) windowsize = TimeSlice(duration=Milliseconds(200)) stepsize = TimeSlice(duration=Milliseconds(100)) _, windowed = samples.sliding_window_with_leftovers( windowsize=windowsize, stepsize=stepsize, dopad=True) coeffs = fft(windowed) self.assertIsInstance(coeffs, ArrayWithUnits) self.assertEqual(2, len(coeffs.dimensions)) self.assertEqual(windowed.dimensions[0], coeffs.dimensions[0]) self.assertIsInstance(coeffs.dimensions[1], FrequencyDimension)
def test_can_invert_categorical_distribution(self): samplerate = SR11025() synth = SineSynthesizer(samplerate) samples = synth.synthesize(Seconds(4), [220, 440, 880]) _, windowed = samples.sliding_window_with_leftovers( TimeSlice(duration=samplerate.frequency * 512), TimeSlice(duration=samplerate.frequency * 256)) c = categorical(windowed, mu=255) inverted = inverse_categorical(c, mu=255) self.assertEqual(windowed.shape, inverted.shape) self.assertIsInstance(inverted, ArrayWithUnits) self.assertSequenceEqual(windowed.dimensions, inverted.dimensions)
def test_can_phase_shift_2d_signal(self): samplerate = SR22050() samples = SineSynthesizer(samplerate) \ .synthesize(Milliseconds(2500), [220, 440, 880]) windowsize = TimeSlice(duration=Milliseconds(200)) stepsize = TimeSlice(duration=Milliseconds(100)) _, windowed = samples.sliding_window_with_leftovers( windowsize=windowsize, stepsize=stepsize, dopad=True) coeffs = fft(windowed) shifted = phase_shift(coeffs, samplerate, Milliseconds(40)) synth = FFTSynthesizer() new_samples = synth.synthesize(shifted).squeeze() self.assertNotEqual(0, self._mean_squared_error(samples, new_samples))
def test_can_pad_for_better_frequency_resolution(self): samples = SilenceSynthesizer(SR22050()).synthesize(Milliseconds(2500)) windowsize = TimeSlice(duration=Milliseconds(200)) stepsize = TimeSlice(duration=Milliseconds(100)) _, windowed = samples.sliding_window_with_leftovers( windowsize=windowsize, stepsize=stepsize, dopad=True) coeffs = fft(windowed, padding_samples=1024) self.assertIsInstance(coeffs, ArrayWithUnits) self.assertEqual(2, len(coeffs.dimensions)) self.assertEqual(windowed.dimensions[0], coeffs.dimensions[0]) self.assertIsInstance(coeffs.dimensions[1], FrequencyDimension) expected_size = ((windowed.shape[-1] + 1024) // 2) + 1 self.assertEqual(expected_size, coeffs.shape[-1])
def test_can_decompose(self): sr = SR22050() samples = SilenceSynthesizer(sr).synthesize(Milliseconds(9999)) wscheme = sr.windowing_scheme(8192, 4096) duration = TimeSlice(wscheme.duration) frequency = TimeSlice(wscheme.frequency) _, windowed = samples.sliding_window_with_leftovers(duration, frequency, dopad=True) fa = frequency_decomposition(windowed, [32, 64, 128, 256, 512, 1024, 2048, 4096]) self.assertEqual(windowed.dimensions[0], fa.dimensions[0]) self.assertIsInstance(fa.dimensions[1], ExplicitFrequencyDimension)
def test_sliding_window(self): samples = AudioSamples.silence(SR11025(), Seconds(30)) sr = samples.samplerate * Stride(frequency=16, duration=512) windowed = samples.sliding_window(sr) self.assertEqual((512, ), windowed.shape[1:]) long_sr = SampleRate(frequency=sr.frequency * 2, duration=sr.frequency * 32) frequency = TimeSlice(duration=long_sr.frequency) duration = TimeSlice(duration=long_sr.duration) _, long_windowed = windowed.sliding_window_with_leftovers( windowsize=duration, stepsize=frequency, dopad=True) self.assertEqual((32, 512), long_windowed.shape[1:]) self.assertEqual(3, long_windowed.ndim)
def test_2d_phase_shift_returns_correct_shape(self): samplerate = SR22050() samples = SineSynthesizer(samplerate) \ .synthesize(Milliseconds(2500), [220, 440, 880]) windowsize = TimeSlice(duration=Milliseconds(200)) stepsize = TimeSlice(duration=Milliseconds(100)) _, windowed = samples.sliding_window_with_leftovers( windowsize=windowsize, stepsize=stepsize, dopad=True) coeffs = fft(windowed) shifted = phase_shift(coeffs=coeffs, samplerate=samplerate, time_shift=Milliseconds(40), frequency_band=FrequencyBand(50, 5000)) self.assertEqual(coeffs.shape, shifted.shape)
def test_time_slice_should_return_audio_samples(self): silence = AudioSamples.silence(SR11025(), Seconds(10)) ts = TimeSlice(duration=Seconds(1)) sliced = silence[ts] self.assertIsInstance(sliced, AudioSamples) self.assertEqual(int(SR11025()), len(sliced)) self.assertEqual(SR11025(), sliced.samplerate)
def _dequeue(self): duration = TimeSlice(duration=self._scheme.duration) frequency = TimeSlice(duration=self._scheme.frequency) leftover, arr = self._cache.sliding_window_with_leftovers( duration, frequency, dopad=self._finalized) if not arr.size: raise NotEnoughData() self._cache = leftover # BUG: Order matters here (try arr * self._func instead) # why does that statement result in __rmul__ being called for each # scalar value in arr? out = (self._func * arr) if self._func else arr return out
def test_from_timeslice_closed(self): ts = TimeSlice( start=Picoseconds(int(1e12)) * 2.5, duration=Milliseconds(2000)) self.assertEqual( 'seconds 2.5-4.5/100.0', str(ContentRange.from_timeslice(ts, Seconds(100))))
def test_can_get_closed_time_slice(self): rr = RangeRequest('seconds=10.5-100.5') sl = rr.range() self.assertIsInstance(sl, TimeSlice) expected_start = Picoseconds(int(10.5 * 1e12)) expected_duration = Picoseconds(int(90 * 1e12)) self.assertEqual( TimeSlice(start=expected_start, duration=expected_duration), sl)
def _parse_result(self, result): d = json.loads(result) ts = TimeSlice(**self.decoder.kwargs(d)) if not self.extra_data: return d['_id'], ts return d['_id'], ts, d['extra_data']
def test_can_apply_empty_time_slice_to_wrapper(self): synth = SineSynthesizer(SR11025()) samples = synth.synthesize(Seconds(10)) encoded = samples.encode(fmt='OGG', subtype='VORBIS') wrapper = OggVorbisWrapper(encoded) samples = wrapper[TimeSlice()] expected = Seconds(10) / Seconds(1) actual = samples.end / Seconds(1) self.assertAlmostEqual(expected, actual, places=6)
def iter_chunks(self): chunksize = Seconds(1) ts = TimeSlice(chunksize) sl = self[ts] yield sl while len(sl) >= self._n_samples(chunksize): ts += chunksize sl = self[ts] yield sl
def test_can_access_time_slice_and_int_index(self): tf = ArrayWithUnits(np.ones((10, 10)), dimensions=[ TimeDimension(Seconds(1), Seconds(1)), FrequencyDimension( LinearScale(FrequencyBand(0, 1000), 10)) ]) sliced = tf[TimeSlice(start=Seconds(1), duration=Seconds(2)), 0] self.assertEqual((2, ), sliced.shape) self.assertIsInstance(sliced.dimensions[0], TimeDimension)
def time_slice(self, start, stop): start = float(start) try: stop = float(stop) except ValueError: stop = None duration = \ None if stop is None else Picoseconds(int(1e12 * (stop - start))) start = Picoseconds(int(1e12 * start)) return TimeSlice(duration, start=start)
def test_can_resynthesize_frequency_decomposition(self): sr = SR22050() samples = SilenceSynthesizer(sr).synthesize(Milliseconds(9999)) window_size = 8192 wscheme = sr.windowing_scheme(window_size, window_size // 2) duration = TimeSlice(wscheme.duration) frequency = TimeSlice(wscheme.frequency) _, windowed = samples.sliding_window_with_leftovers( duration, frequency, dopad=True) fa = frequency_decomposition( windowed, [32, 64, 128, 256, 512, 1024, 2048, 4096]) fdsynth = FrequencyDecompositionSynthesizer(sr, window_size) samples = fdsynth.synthesize(fa) self.assertEqual(2, samples.ndim) self.assertEqual(windowed.dimensions[1], samples.dimensions[1]) self.assertEqual(windowed.dimensions[0], samples.dimensions[0])
def test_iter_slices_yields_evenly_spaced_time_slices(self): raw = np.random.random_sample((10, 3)) arr = ArrayWithUnits(raw, dimensions=[ TimeDimension(frequency=Milliseconds(500), duration=Seconds(1)), IdentityDimension() ]) crts = ConstantRateTimeSeries(arr) slices = list(crts.iter_slices()) self.assertEqual(10, len(slices)) ts1, d1 = slices[0] self.assertEqual(TimeSlice(start=Seconds(0), duration=Seconds(1)), ts1) np.testing.assert_allclose(raw[0], d1) ts2, d2 = slices[1] self.assertEqual( TimeSlice(start=Milliseconds(500), duration=Seconds(1)), ts2) np.testing.assert_allclose(raw[1], d2)
def stft(x, window_sample_rate=HalfLapped(), window=HanningWindowingFunc()): duration = TimeSlice(window_sample_rate.duration) frequency = TimeSlice(window_sample_rate.frequency) if x.ndim == 1: _, arr = x.sliding_window_with_leftovers(duration, frequency, dopad=True) elif x.ndim == 2 and isinstance(x.dimensions[0], IdentityDimension): arr = x.sliding_window((1, duration), (1, frequency)) td = x.dimensions[-1] dims = [IdentityDimension(), TimeDimension(*window_sample_rate), td] arr = ArrayWithUnits(arr.reshape((len(x), -1, arr.shape[-1])), dims) else: raise ValueError('x must either have a single TimeDimension, or ' '(IdentityDimension, TimeDimension)') window = window or IdentityWindowingFunc() windowed = arr * window._wdata(arr.shape[-1]) return fft(windowed)
def _process(self, data): td = data.dimensions[0] frequency = td.frequency indices = self._onset_indices(data) timestamps = self._pos + (indices * frequency) self._pos += len(data) * frequency timestamps = [self._leftover_timestamp] + list(timestamps) self._leftover_timestamp = timestamps[-1] time_slices = TimeSlice.slices(timestamps) vrts = VariableRateTimeSeries([(ts, np.zeros(0)) for ts in time_slices]) yield vrts
def test_can_apply_sliding_window(self): sr = SR11025() hl = sr.half_lapped() scale = GeometricScale(20, sr.nyquist, 0.175, 64) td = TimeDimension(frequency=hl.frequency, duration=hl.duration) fd = FrequencyDimension(scale) arr = ArrayWithUnits(np.zeros((99, 64)), [td, fd]) ts = TimeSlice(duration=hl.frequency * 64) fs = FrequencyBand(0, sr.nyquist) windowed = arr.sliding_window((ts, fs)) self.assertEqual((1, 64, 64), windowed.shape)
def test_perfect_reconstruction_using_overlap_add(self): synth = SineSynthesizer(SR22050()) audio = synth.synthesize(Seconds(10), [440., 660., 880.]) sr = SampleRate(duration=Seconds(1), frequency=Milliseconds(500)) windowed = audio.sliding_window(sr) mdct = MDCT() coeffs = list(mdct._process(windowed * OggVorbisWindowingFunc()))[0] mdct_synth = MDCTSynthesizer() recon = mdct_synth.synthesize(coeffs) # take a slice, so we can ignore boundary conditions slce = TimeSlice(start=Seconds(1), duration=Seconds(8)) np.testing.assert_allclose(recon[slce], audio[slce])
def serialize(self, context): feature = context.feature document = context.document slce = context.slce wrapper = feature(_id=document._id, persistence=document) samples = wrapper[slce] bio = BytesIO() with SoundFile(bio, mode='w', samplerate=wrapper.samplerate, channels=wrapper.channels, format='OGG', subtype='VORBIS') as sf: sf.write(samples) bio.seek(0) content_range = ContentRange.from_timeslice( slce, Picoseconds(int(1e12 * wrapper.duration_seconds))) return TempResult(bio.read(), 'audio/ogg', is_partial=slce != TimeSlice(), content_range=content_range)
def test_can_get_open_ended_time_slice(self): rr = RangeRequest('seconds=0-') sl = rr.range() self.assertIsInstance(sl, TimeSlice) self.assertEqual(TimeSlice(start=Seconds(0)), sl)
def test_from_timeslice_open_ended(self): ts = TimeSlice(start=Picoseconds(int(1e12)) * 2.5) self.assertEqual('seconds 2.5-100.0/100.0', str(ContentRange.from_timeslice(ts, Seconds(100))))
def test_from_timeslce_full_slice(self): ts = TimeSlice() self.assertEqual('seconds 0.0-100.0/100.0', str(ContentRange.from_timeslice(ts, Seconds(100))))
def test_can_get_end_of_ogg_vorbis_feature_with_slice(self): ogg = self.doc.ogg samples = ogg[TimeSlice(Seconds(1), Milliseconds(9500))] self.assertEqual(22050, len(samples))