コード例 #1
0
 def listen():
     padding = zounds.Milliseconds(250)
     z = np.concatenate(list(walk2(1000)))
     result = p.pipeline.transform(z).data.squeeze()
     x = np.concatenate([
         zounds.AudioSamples(j, samplerate).pad_with_silence(padding)
         for j in result
     ])
     return zounds.AudioSamples(x, zounds.SR11025())
コード例 #2
0
def view_band(index):
    from scipy.signal import resample
    band = bands[index].squeeze()
    band = resample(band, total_samples)
    samples = zounds.AudioSamples(band, sr)
    coeffs = np.abs(zounds.spectral.stft(samples))
    return coeffs
コード例 #3
0
def preview(fake_batch):

    # fake_batch = fake_batch * spec_std
    # fake_batch = fake_batch + spec_mean

    fake_batch = torch.from_numpy(fake_batch)
    inp = fake_batch[0]  # (256, 512)

    window = audio_generator_input_size
    inp = inp.unfold(1, window, window)  # (256, 8, 64)

    inp = inp.permute(1, 0, 2)  # (8, 256, 64)
    bands = audio_generator(inp)
    samples = frequency_recomposition([
        np.concatenate(b.data.cpu().numpy().reshape(1, 1, -1), axis=-1)
        for b in bands.values()
    ], total_feature_samples)

    # synth = zounds.MDCTSynthesizer()
    # coeffs = zounds.ArrayWithUnits(fake_batch[0].T, [
    #     zounds.TimeDimension(frequency=sr.frequency * 256, duration=sr.frequency * 512),
    #     zounds.IdentityDimension()
    # ])
    # samples = synth.synthesize(coeffs)

    return zounds.AudioSamples(samples.squeeze(), sr).pad_with_silence()
コード例 #4
0
def g_sample():
    recmposed = frequency_recomposition(bands, total_samples)
    index = np.random.randint(0, len(recmposed))
    fake_sample = zounds.AudioSamples(recmposed[index], sr)
    fake_sample /= fake_sample.max()
    coeffs = np.abs(zounds.spectral.stft(fake_sample))
    return fake_sample, coeffs
コード例 #5
0
 def hear_real_band(samples, index):
     from scipy.signal import resample
     band = samples[index][0].data.cpu().numpy().squeeze()
     if len(band) != total_samples:
         band = resample(band, total_samples)
     samples = zounds.AudioSamples(band, sr)
     samples /= (samples.max() + 1e-12)
     return samples
コード例 #6
0
 def hear_band(index):
     from scipy.signal import resample
     band = bands[index][0].squeeze()
     if len(band) != total_samples:
         band = resample(band, total_samples)
     samples = zounds.AudioSamples(band, sr)
     samples /= (samples.max() + 1e-12)
     return samples
コード例 #7
0
 def to_audio(self):
     log_mag, phase = self.data[..., 0], self.data[..., 1]
     mag = np.exp(log_mag)
     phase = np.cumsum(phase, axis=0)
     phase = (phase + np.pi) % (2 * np.pi) - np.pi
     coeffs = mag * np.exp(1j * phase)
     samples = self.proc.istft(coeffs)
     return zounds.AudioSamples(samples, self.samplerate)[None, :]
コード例 #8
0
 def view_real_band(samples, index):
     from scipy.signal import resample
     band = samples[index][0].data.cpu().numpy().squeeze()
     if len(band) != total_samples:
         band = resample(band, total_samples)
     samples = zounds.AudioSamples(band, sr)
     coeffs = np.abs(zounds.spectral.stft(samples))
     return coeffs
コード例 #9
0
def test_synthetic(batch_size):
    synth = zounds.SineSynthesizer(sr)
    samples = synth.synthesize(sr.frequency * total_samples,
                               [55, 110, 220, 440, 880, 1660, 1660 * 2])
    batch = np.repeat(samples[None, :], batch_size, axis=0)
    bands = frequency_decomposition(batch, band_sizes)
    recomposed = frequency_recomposition(bands, total_samples)
    recomposed = zounds.AudioSamples(recomposed[0], sr).pad_with_silence()
    return samples, recomposed
コード例 #10
0
def test_filter_bank_recon(samples, return_spectral=False):
    samples = samples[:1, ...]

    samples /= samples.max()

    bands = frequency_decomposition(samples, band_sizes)
    new_bands = []
    spectral = []

    for band, fb in zip(bands, filter_banks):
        band = torch.from_numpy(band).float().to(device)
        sp = fb.convolve(band)
        spectral.append(sp.data.cpu().numpy())
        band = fb.transposed_convolve(sp)
        new_bands.append(band.data.cpu().numpy())

    final = frequency_recomposition(new_bands, total_samples)
    orig = zounds.AudioSamples(samples.squeeze(), sr)
    final = zounds.AudioSamples(final.squeeze(), sr)
    final /= final.max()
    if return_spectral:
        return orig, final, spectral
    else:
        return orig, final
コード例 #11
0
def load_and_play():
    files = sorted(
        glob.glob('*.npy'),
        cmp=lambda x, y: int(os.stat(x).st_ctime - os.stat(y).st_ctime))
    most_recent = files[-1]
    print 'loading generated examples from', most_recent
    results = np.load(most_recent)

    # synthesized = FrequencyDecomposition.synthesize_block(results)
    synthesized = results

    for raw, result in zip(results, synthesized):
        windowed = zounds.sliding_window(result, 512, 256)
        spec = np.abs(np.fft.rfft(windowed))
        audio_samples = zounds.AudioSamples(result, samplerate) \
            .pad_with_silence(zounds.Seconds(1))
        yield raw, result, audio_samples / audio_samples.max(), spec
コード例 #12
0
    def check_recon():
        spec, = next(stream(batch_size=1))
        batch, channels, time = spec.shape
        spec = spec.transpose((0, 2, 1)).reshape((batch * time, channels))

        norms = np.linalg.norm(spec, axis=-1, keepdims=True)
        spec /= norms + 1e-12

        indices = kmeans.predict(spec)
        centers = kmeans.cluster_centers_[indices]

        centers *= norms

        recon = centers.reshape((batch, time, channels)).transpose((0, 2, 1))

        bands = generator.forward(torch.from_numpy(recon))
        audio = fft_frequency_recompose(bands, 256 * 256).data.cpu().numpy()
        audio = zounds.AudioSamples(audio.squeeze(), samplerate)
        return spec, recon.squeeze().T, audio
コード例 #13
0
def test_spectral_filtering():
    # (1, 129, 64)
    total_samples = 16384
    window_size = 32
    hop_size = 16

    coeffs = get_filter_coeffs(window_size, total_samples // hop_size)

    noise = np.random.uniform(-1, 1, total_samples)
    noise = np.pad(noise, ((0, hop_size), ), mode='constant')
    windowed = zounds.sliding_window(noise, window_size, hop_size)
    # (1, 64, 256)
    noise_coeffs = np.fft.rfft(windowed, axis=-1, norm='ortho')
    # (1, 64, 129)

    filtered = coeffs.transpose((0, 2, 1)) * noise_coeffs
    recovered = np.fft.irfft(filtered, axis=-1, norm='ortho')
    samples = np_overlap_add(recovered[:, None, :, :], apply_window=True)
    samples = samples.squeeze()[:total_samples]
    # (1, 64, 256)
    return zounds.AudioSamples(samples, zounds.SR11025()).pad_with_silence()
コード例 #14
0
 def display(self):
     raw = self.to_audio()[0]
     audio = zounds.AudioSamples(raw, self.samplerate)
     return spectrogram(audio)
コード例 #15
0
        resampled = fft_resample(band, desired_size, size == first_band)
        # if size != desired_size:
        #     resampled = torch.zeros_like(resampled)
        bands.append(resampled)
    return sum(bands)


if __name__ == '__main__':
    app = zounds.ZoundsApp(globals=globals(), locals=locals())
    app.start_in_thread(9999)

    sr = zounds.SR11025()
    synth = zounds.NoiseSynthesizer(sr)
    noise = synth.synthesize(sr.frequency * 16385)
    signal = torch.from_numpy(noise).view(1, 1, 16384).float()

    rs = fft_resample(signal, 16384, is_lowest_band=False)
    rs = zounds.AudioSamples(rs.data.cpu().numpy().squeeze(), sr)

    bands = fft_frequency_decompose(signal, 512)
    recon = {}

    for k, v in bands.items():
        print(k, v.shape)
        recon[k] = zounds.AudioSamples(
            fft_resample(v, 16384, k == 512).data.cpu().numpy().squeeze(), sr)

    r = fft_frequency_recompose(bands, 16384)
    r = zounds.AudioSamples(r.data.cpu().numpy().squeeze(), sr)

    input('waiting...')
コード例 #16
0
 def listen(self):
     return zounds.AudioSamples(self.to_audio()[0], self.samplerate)\
         .pad_with_silence(zounds.Seconds(1))
コード例 #17
0
 def synthesize_iter(self):
     fa = self.as_frequency_adaptive()
     samples = self.__class__.synthesize_block(fa)
     for sample in samples:
         yield sample, zounds.AudioSamples(sample, samplerate) \
             .pad_with_silence(zounds.Seconds(1))
コード例 #18
0
#     noise = torch.FloatTensor(16384).uniform_(-1, 1)
#     windowed = noise.unfold(-1, 256, 256)
#     noise_coeffs = torch.rfft(windowed, 1, normalized=True)
#     noise_coeffs = noise_coeffs.view(1, 64, 129, 2)
#
#     coeffs = coeffs.permute(0, 2, 1)[..., None]
#
#     filtered = coeffs * noise_coeffs
#     recovered = torch.irfft(filtered, 1, normalized=True, signal_sizes=(256,))
#     recovered = recovered.view(-1)
#     return zounds.AudioSamples(
#         recovered.data.cpu().numpy().squeeze(),
#         zounds.SR11025()
#     ).pad_with_silence()

real_noise = zounds.AudioSamples(np.random.uniform(-1, 1, 16384),
                                 zounds.SR11025()).pad_with_silence()
spec_test = test_spectral_filtering()
# spec_test /= (spec_test.max() + 1e-12)
# torch_spec_test = test_spectral_filtering_torch()
# torch_spec_test /= (torch_spec_test.max() + 1e-12)

if __name__ == '__main__':
    app = zounds.ZoundsApp(globals=globals(), locals=locals())
    app.start_in_thread(8888)

    feature_size = 64

    g = DDSPGenerator(feature_size, feature_channels, 128, None, None, None,
                      None) \
        .to(device) \
        .initialize_weights()
コード例 #19
0
ファイル: effects.py プロジェクト: maozhiqiang/zounds

if __name__ == '__main__':
    parser = argparse.ArgumentParser(parents=[AppSettings()])
    parser.add_argument(
        '--sound-uri',
        default=
        'https://archive.org/download/LucaBrasi2/06-Kevin_Gates-Out_The_Mud_Prod_By_The_Runners_The_Monarch.ogg'
    )
    args = parser.parse_args()

    _id = Sound.process(meta=args.sound_uri)
    snd = Sound(_id)

    original = snd.resampled
    slow = zounds.AudioSamples(time_stretch(original, 0.75).squeeze(), sr)
    fast = zounds.AudioSamples(time_stretch(original, 1.25).squeeze(), sr)

    higher = zounds.AudioSamples(pitch_shift(original, 1.0).squeeze(), sr)
    lower = zounds.AudioSamples(pitch_shift(original, -1.0).squeeze(), sr)

    # apply a sliding window to demonstrate time stretch and pitch shift in
    # batch mode
    windowing_sr = zounds.SampleRate(frequency=zounds.Seconds(5),
                                     duration=zounds.Seconds(10))

    windowed = snd.resampled.sliding_window(windowing_sr)
    windowed = zounds.ArrayWithUnits(
        windowed, [zounds.IdentityDimension(), windowed.dimensions[1]])

    def samples(x):
コード例 #20
0
 def fake_audio():
     samples = zounds.AudioSamples(generated[0].squeeze(), sr)
     return samples.pad_with_silence()
コード例 #21
0
ファイル: effects.py プロジェクト: maozhiqiang/zounds
 def samples(x):
     return zounds.AudioSamples(x, sr)
コード例 #22
0
        samples = zounds.AudioSamples(generated[0].squeeze(), sr)
        return samples.pad_with_silence()

    def fake_spec():
        return zounds.log_modulus(spectrogram(fake_audio()) * 100)

    def r_spec():
        return zounds.log_modulus(spectrogram(real_audio) * 100)

    for samples, features in batch_stream:

        samples /= np.abs(samples).max(axis=-1, keepdims=True) + 1e-12
        features /= features.max(axis=(1, 2), keepdims=True) + 1e-12

        real_spec = features[0].T
        real_audio = zounds.AudioSamples(samples[0].squeeze(),
                                         sr).pad_with_silence()

        samples = torch.from_numpy(samples).to(device)
        # samples = normalize(samples)

        features = torch.from_numpy(features).to(device)
        # features = normalize(features)

        step = next(steps)
        data = step(samples, features)
        print({k: v for k, v in data.items() if 'loss' in k})
        try:
            generated = data['fake']
        except KeyError:
            pass
        batch_count += 1