def real_stft(self): snd = self.sound_cls.random() windowed = choice(snd.windowed) windowed = AudioSamples( windowed, audio_sample_rate(windowed.dimensions[0].samples_per_second)) return self._stft(windowed)
def fft(x, axis=-1, padding_samples=0): """ Apply an FFT along the given dimension, and with the specified amount of zero-padding Args: x (ArrayWithUnits): an :class:`~zounds.core.ArrayWithUnits` instance which has one or more :class:`~zounds.timeseries.TimeDimension` axes axis (int): The axis along which the fft should be applied padding_samples (int): The number of padding zeros to apply along axis before performing the FFT """ if padding_samples > 0: padded = np.concatenate( [x, np.zeros((len(x), padding_samples), dtype=x.dtype)], axis=axis) else: padded = x transformed = np.fft.rfft(padded, axis=axis, norm='ortho') sr = audio_sample_rate(int(Seconds(1) / x.dimensions[axis].frequency)) scale = LinearScale.from_sample_rate(sr, transformed.shape[-1]) new_dimensions = list(x.dimensions) new_dimensions[axis] = FrequencyDimension(scale) return ArrayWithUnits(transformed, new_dimensions)
def frequency_decomposition(x, sizes): sizes = sorted(sizes) if x.ndim == 1: end = x.dimensions[0].end x = ArrayWithUnits(x[None, ...], [TimeDimension(end, end), x.dimensions[0]]) original_size = x.shape[-1] time_dimension = x.dimensions[-1] samplerate = audio_sample_rate(time_dimension.samples_per_second) data = x.copy() bands = [] frequency_bands = [] start_hz = 0 for size in sizes: if size != original_size: s = resample(data, size, axis=-1) else: s = data.copy() bands.append(s) data -= resample(s, original_size, axis=-1) stop_hz = samplerate.nyquist * (size / original_size) frequency_bands.append(FrequencyBand(start_hz, stop_hz)) start_hz = stop_hz scale = ExplicitScale(frequency_bands) return FrequencyAdaptive(bands, scale=scale, time_dimension=x.dimensions[0])
def _get_samples(self): raw_samples = self._sf.read(self._chunk_size_samples) sr = audio_sample_rate(self._sf.samplerate) samples = AudioSamples(raw_samples, sr) if self._sum_to_mono: return samples.mono return samples
def _process(self, data): transformed = self._process_raw(data) sr = audio_sample_rate(data.dimensions[1].samples_per_second) scale = LinearScale.from_sample_rate(sr, transformed.shape[1]) yield ArrayWithUnits( transformed, [data.dimensions[0], FrequencyDimension(scale)])
def _process(self, data): raw = self._process_raw(data) sr = audio_sample_rate( int(data.shape[1] / data.dimensions[0].duration_in_seconds)) scale = LinearScale.from_sample_rate( sr, data.shape[1], always_even=self.scale_always_even) yield ArrayWithUnits( raw, [data.dimensions[0], FrequencyDimension(scale)])
def _process(self, data): transformed = dct(data, norm='ortho', axis=self._axis) sr = audio_sample_rate( int(data.shape[1] / data.dimensions[0].duration_in_seconds)) scale = LinearScale.from_sample_rate( sr, transformed.shape[-1], always_even=self.scale_always_even) yield ArrayWithUnits( transformed, [data.dimensions[0], FrequencyDimension(scale)])
def __getitem__(self, timeslice): sr = audio_sample_rate(self.samplerate) if timeslice == slice(None): self._sf.seek(0) return AudioSamples(self._sf.read(len(self._sf)), sr) start_sample = int(timeslice.start / self._freq) n_samples = self._n_samples(timeslice.duration) self._sf.seek(start_sample) return AudioSamples(self._sf.read(n_samples), sr)
def __call__(self, flo): raw = super(AudioSamplesDecoder, self).__call__(flo) samplerate = audio_sample_rate(raw.dimensions[0].samples_per_second) return AudioSamples(raw, samplerate)