Ejemplo n.º 1
0
def compute_features_batched(samples, samples_per_example, batch_size,
                             feature_func):

    # Pad so we have an even multiple of samples per example
    # TODO: Get rid of hard-coded feature-size and hop-size here
    expected_frames = max(64, len(samples) // 256)
    leftovers = len(samples) % samples_per_example
    padding_amt = samples_per_example - leftovers
    samples = np.pad(samples, ((0, padding_amt), ), mode='constant')

    # compute features in batches
    x = zounds.sliding_window(samples, samples_per_example)
    results = []
    for i in range(0, len(x), batch_size):
        results.append(feature_func(x[i:i + batch_size]))
    results = np.concatenate(results, axis=0)

    # reshape
    # now we have (batches, channels, time)
    channels = results.shape[1]
    results = results.transpose((0, 2, 1))
    # now we have (batches, time, channels)
    results = results.reshape(-1, channels)
    # now we have (time, channels)

    # lop off extra padding frames
    results = results[:expected_frames, :]

    return results
Ejemplo n.º 2
0
def pooled(result):
    padding = np.zeros((result.shape[0], result.shape[1], 256))
    result = np.concatenate([result, padding], axis=-1)
    result = zounds.sliding_window(result,
                                   (result.shape[0], result.shape[1], 512),
                                   (result.shape[0], result.shape[1], 256))
    result = result.max(axis=-1).transpose((1, 2, 0))
    return result
Ejemplo n.º 3
0
def load_and_play():
    files = sorted(
        glob.glob('*.npy'),
        cmp=lambda x, y: int(os.stat(x).st_ctime - os.stat(y).st_ctime))
    most_recent = files[-1]
    print 'loading generated examples from', most_recent
    results = np.load(most_recent)

    # synthesized = FrequencyDecomposition.synthesize_block(results)
    synthesized = results

    for raw, result in zip(results, synthesized):
        windowed = zounds.sliding_window(result, 512, 256)
        spec = np.abs(np.fft.rfft(windowed))
        audio_samples = zounds.AudioSamples(result, samplerate) \
            .pad_with_silence(zounds.Seconds(1))
        yield raw, result, audio_samples / audio_samples.max(), spec
Ejemplo n.º 4
0
def fractal(x, window_size):
    examples, channels = x.shape
    l = math.log(channels, window_size)
    if l % 1 != 0:
        raise ValueError(f'window size must be a logarithm '
                         f'of {channels} but was {window_size}')
    output = []
    while x.shape[-1] > 1:
        x = zounds.sliding_window(x, (1, window_size), (1, window_size),
                                  flatten=False).squeeze(axis=2)
        # (examples, n_windows, window)
        norms = np.linalg.norm(x, axis=-1, keepdims=True)
        # print(x.shape, norms.shape)
        # (examples, n_windows, 1)
        x = x / (norms + 1e-12)
        output.append(x.reshape((examples, -1)))
        x = norms.reshape((examples, -1))

    output.append(x)
    return output[::-1]
Ejemplo n.º 5
0
def test_spectral_filtering():
    # (1, 129, 64)
    total_samples = 16384
    window_size = 32
    hop_size = 16

    coeffs = get_filter_coeffs(window_size, total_samples // hop_size)

    noise = np.random.uniform(-1, 1, total_samples)
    noise = np.pad(noise, ((0, hop_size), ), mode='constant')
    windowed = zounds.sliding_window(noise, window_size, hop_size)
    # (1, 64, 256)
    noise_coeffs = np.fft.rfft(windowed, axis=-1, norm='ortho')
    # (1, 64, 129)

    filtered = coeffs.transpose((0, 2, 1)) * noise_coeffs
    recovered = np.fft.irfft(filtered, axis=-1, norm='ortho')
    samples = np_overlap_add(recovered[:, None, :, :], apply_window=True)
    samples = samples.squeeze()[:total_samples]
    # (1, 64, 256)
    return zounds.AudioSamples(samples, zounds.SR11025()).pad_with_silence()