def compute_features_batched(samples, samples_per_example, batch_size, feature_func): # Pad so we have an even multiple of samples per example # TODO: Get rid of hard-coded feature-size and hop-size here expected_frames = max(64, len(samples) // 256) leftovers = len(samples) % samples_per_example padding_amt = samples_per_example - leftovers samples = np.pad(samples, ((0, padding_amt), ), mode='constant') # compute features in batches x = zounds.sliding_window(samples, samples_per_example) results = [] for i in range(0, len(x), batch_size): results.append(feature_func(x[i:i + batch_size])) results = np.concatenate(results, axis=0) # reshape # now we have (batches, channels, time) channels = results.shape[1] results = results.transpose((0, 2, 1)) # now we have (batches, time, channels) results = results.reshape(-1, channels) # now we have (time, channels) # lop off extra padding frames results = results[:expected_frames, :] return results
def pooled(result): padding = np.zeros((result.shape[0], result.shape[1], 256)) result = np.concatenate([result, padding], axis=-1) result = zounds.sliding_window(result, (result.shape[0], result.shape[1], 512), (result.shape[0], result.shape[1], 256)) result = result.max(axis=-1).transpose((1, 2, 0)) return result
def load_and_play(): files = sorted( glob.glob('*.npy'), cmp=lambda x, y: int(os.stat(x).st_ctime - os.stat(y).st_ctime)) most_recent = files[-1] print 'loading generated examples from', most_recent results = np.load(most_recent) # synthesized = FrequencyDecomposition.synthesize_block(results) synthesized = results for raw, result in zip(results, synthesized): windowed = zounds.sliding_window(result, 512, 256) spec = np.abs(np.fft.rfft(windowed)) audio_samples = zounds.AudioSamples(result, samplerate) \ .pad_with_silence(zounds.Seconds(1)) yield raw, result, audio_samples / audio_samples.max(), spec
def fractal(x, window_size): examples, channels = x.shape l = math.log(channels, window_size) if l % 1 != 0: raise ValueError(f'window size must be a logarithm ' f'of {channels} but was {window_size}') output = [] while x.shape[-1] > 1: x = zounds.sliding_window(x, (1, window_size), (1, window_size), flatten=False).squeeze(axis=2) # (examples, n_windows, window) norms = np.linalg.norm(x, axis=-1, keepdims=True) # print(x.shape, norms.shape) # (examples, n_windows, 1) x = x / (norms + 1e-12) output.append(x.reshape((examples, -1))) x = norms.reshape((examples, -1)) output.append(x) return output[::-1]
def test_spectral_filtering(): # (1, 129, 64) total_samples = 16384 window_size = 32 hop_size = 16 coeffs = get_filter_coeffs(window_size, total_samples // hop_size) noise = np.random.uniform(-1, 1, total_samples) noise = np.pad(noise, ((0, hop_size), ), mode='constant') windowed = zounds.sliding_window(noise, window_size, hop_size) # (1, 64, 256) noise_coeffs = np.fft.rfft(windowed, axis=-1, norm='ortho') # (1, 64, 129) filtered = coeffs.transpose((0, 2, 1)) * noise_coeffs recovered = np.fft.irfft(filtered, axis=-1, norm='ortho') samples = np_overlap_add(recovered[:, None, :, :], apply_window=True) samples = samples.squeeze()[:total_samples] # (1, 64, 256) return zounds.AudioSamples(samples, zounds.SR11025()).pad_with_silence()