def perceptual(x): coeffs = np.fft.rfft(x, norm='ortho', axis=-1) scale = zounds.LinearScale.from_sample_rate(samplerate, coeffs.shape[-1]) arr = zounds.ArrayWithUnits( coeffs, [x.dimensions[0], zounds.FrequencyDimension(scale)]) arr *= zounds.AWeighting() samples = np.fft.irfft(arr, norm='ortho', axis=-1) return zounds.ArrayWithUnits(samples, x.dimensions)
class Sound(Resampled): """ A simple pipeline that computes a perceptually weighted modified discrete cosine transform, and "persists" feature data in an in-memory store. """ windowed = zounds.ArrayWithUnitsFeature( zounds.SlidingWindow, needs=Resampled.resampled, wscheme=zounds.HalfLapped(), wfunc=zounds.OggVorbisWindowingFunc(), store=True) mdct = zounds.ArrayWithUnitsFeature(zounds.MDCT, needs=windowed) weighted = zounds.ArrayWithUnitsFeature(lambda x: x * zounds.AWeighting(), needs=mdct)
def spectrogram(x): x = apply_scale(np.abs(x.real), scale, window=zounds.OggVorbisWindowingFunc()) x = zounds.log_modulus(x * 100) return x * zounds.AWeighting()
from log import module_logger logger = module_logger(__file__) N_FREQUENCY_BANDS = 512 SAMPLE_RATE = zounds.SR11025() frequency_band = zounds.FrequencyBand(20, SAMPLE_RATE.nyquist) scale = zounds.MelScale(frequency_band, N_FREQUENCY_BANDS) FILTER_BANK_KERNEL_SIZE = 512 FILTER_BANK = zounds.spectral.morlet_filter_bank(SAMPLE_RATE, FILTER_BANK_KERNEL_SIZE, scale, scaling_factor=np.linspace( 0.1, 1.0, len(scale)), normalize=True) FILTER_BANK *= zounds.AWeighting() FILTER_BANK = np.array(FILTER_BANK) class SpectrogramListener(SoundListener): def __init__(self, client, s3_client, page_size=3, logger=None): super().__init__(client, s3_client, page_size, logger) def _process_samples(self, samples): samples = samples.mono samples = zounds.soundfile.resample(samples, SAMPLE_RATE) windowing_sample_rate = zounds.SampleRate( frequency=(FILTER_BANK_KERNEL_SIZE // 2) * SAMPLE_RATE.frequency, duration=FILTER_BANK_KERNEL_SIZE * SAMPLE_RATE.frequency) windowed = samples.sliding_window(windowing_sample_rate) windowed = np.asarray(windowed)
chroma = zounds.ArrayWithUnitsFeature( zounds.Chroma, frequency_band=band, window=window, needs=BaseModel.fft) if __name__ == '__main__': app = zounds.ZoundsApp( model=Sound, visualization_feature=Sound.chroma, audio_feature=Sound.ogg, globals=globals(), locals=locals()) port = 9999 with app.start_in_thread(port): url = 'https://ia802606.us.archive.org/9/items/AOC11B/onclassical_luisi_bach_partita_B-flat-major_bwv-825_6.ogg' _id = Sound.process(meta=url) snd = Sound(_id) chroma_scale = zounds.ChromaScale(band) chroma = chroma_scale.apply( np.abs(snd.fft) * zounds.AWeighting(), window) basis = chroma_scale._basis(snd.fft.dimensions[-1].scale, window) app.start(port)