def modulate_audio(audio, center_ms, depth_ms, mod_rate): mod_delay = ddsp.effects.ModDelay(center_ms=center_ms, depth_ms=depth_ms, gain_scale_fn=None, phase_scale_fn=None) phase = sin_phase(mod_rate) # Hz gain = 1.0 * np.ones_like(audio)[..., np.newaxis] audio_out = 0.5 * mod_delay(audio, gain, phase) # Listen. play(audio_out) specplot(audio_out)
record_seconds = 5#@param {type:"number", min:1, max:10, step:1} if record_or_upload == "Record": audio = record(seconds=record_seconds) else: # Load audio sample here (.mp3 or .wav3 file) # Just use the first file. filenames, audios = upload() audio = audios[0] audio = audio[np.newaxis, :] print('\nExtracting audio features...') # Plot. specplot(audio) play(audio) # Setup the session. ddsp.spectral_ops.reset_crepe() # Compute features. start_time = time.time() audio_features = ddsp.training.metrics.compute_audio_features(audio) audio_features['loudness_db'] = audio_features['loudness_db'].astype(np.float32) audio_features_mod = None print('Audio features took %.1f seconds' % (time.time() - start_time)) TRIM = -15 # Plot Features. fig, ax = plt.subplots(nrows=3,
plt.figure(figsize=(18, 4)) plt.subplot(121) plt.plot(x, y) plt.subplot(122) _ = plt.semilogy(x, y) """## `get_signal()` Synthesizes audio from controls. """ audio = harmonic_synth.get_signal(**controls) play(audio) specplot(audio) """## `__call__()` Synthesizes audio directly from the raw inputs. `get_controls()` is called internally to turn them into valid control parameters. """ audio = harmonic_synth(amps, harmonic_distribution, f0_hz) play(audio) specplot(audio) """# Example: Just for fun... Let's run another example where we tweak some of the controls... """
harmonic_distribution = np.ones([n_frames, 1]) * np.linspace(1.0, -1.0, n_harmonics)[np.newaxis, :] harmonic_distribution = harmonic_distribution[np.newaxis, :, :] # Fundamental frequency in Hz [batch, n_frames, 1]. f0_hz = 440.0 * np.ones([1, n_frames, 1]) # Create synthesizer object. harmonic_synth = ddsp.synths.Harmonic(n_samples=n_samples, scale_fn=ddsp.core.exp_sigmoid, sample_rate=sample_rate) # Generate some audio. audio = harmonic_synth(amps, harmonic_distribution, f0_hz) # Listen. play(audio) specplot(audio) """## Filtered Noise The filtered noise synthesizer is a subtractive synthesizer that shapes white noise with a series of time-varying filter banks. Inputs: * `magnitudes`: Amplitude envelope of each filter bank (linearly spaced from 0Hz to the Nyquist frequency). """ n_frames = 250 n_frequencies = 1000 n_samples = 64000 # Bandpass filters, [n_batch, n_frames, n_frequencies].
""" harmonic = ddsp.synths.Harmonic(n_samples=n_samples) noise = ddsp.synths.FilteredNoise(n_samples=n_samples, initial_bias=0) reverb = ddsp.effects.Reverb() # Python signal processor chain audio_harmonic = harmonic(inputs['amps'], inputs['harmonic_distribution'], inputs['f0_hz']) audio_noise = noise(inputs['magnitudes']) audio_dry = audio_harmonic + audio_noise audio_out = reverb(inputs['ir'], audio_dry) # Listen play(audio_out) specplot(audio_out) """# ProcessorGroup A ProcessorGroup is a Directed Acyclic Graph (DAG) of Processors. You can specify the DAG as a list of tuples `dag = [(processor, ['input1', 'input2', ...]), ...]`, where each tuple is a pair of processor and that processor's inputs respectively. The output signal of any processor can be referenced as an input to a different processor by the string `'processor_name/signal'` where processor_name is the name of the processor at construction. The ProcessorGroup takes a dictionary of inputs, whose keys are referenced as inputs in the DAG. """ print(inputs.keys())
def find_model_dir(dir_name): # Iterate through directories until model directory is found for root, dirs, filenames in os.walk(dir_name): for filename in filenames: if filename.endswith(".gin") and not filename.startswith("."): model_dir = root break return model_dir audio_filepath = "../audio_files/Risers_20.wav" audio = load_audio_signal(audio_filepath, 3) print('\nExtracting audio features...') play(audio, "initial audio") # Setup the session. ddsp.spectral_ops.reset_crepe() # Compute features. start_time = time.time() audio_features = ddsp.training.metrics.compute_audio_features(audio) audio_features['loudness_db'] = audio_features['loudness_db'].astype(np.float32) audio_features_mod = None print('Audio features took %.1f seconds' % (time.time() - start_time)) # Load a model model = 'Violin' #@param ['Violin', 'Flute', 'Flute2', 'Trumpet', 'Tenor_Saxophone', 'Upload your own (checkpoint folder as .zip)'] MODEL = model if model in ('Violin', 'Flute', 'Flute2', 'Trumpet', 'Tenor_Saxophone'): # Pretrained models. PRETRAINED_DIR = '/content/pretrained'
n_samples = int(sample_rate * 4.0) n_components = 3 # Amplitudes [n_batch, n_samples, n_components]. # Linearly decay in time. amps = np.linspace(0.3, 0.0, n_samples) amps = np.tile(amps[np.newaxis, :, np.newaxis], [1, 1, n_components]) # Frequencies in Hz [n_batch, n_samples, n_components]. frequencies = np.ones([1, n_samples, 1]) * np.array([[[220, 440, 660]]]) # Sythesize. audio = ddsp.core.oscillator_bank(frequencies, amps, sample_rate) # Listen. play(audio) specplot(audio) """### Ex: Random frequencies""" n_samples = int(sample_rate * 4.0) n_components = 6 n_frames = 100 # Amplitudes [n_batch, n_samples, n_components]. # Linearly decay in time. amps = np.linspace(0.3, 0.0, n_samples) amps = np.tile(amps[np.newaxis, :, np.newaxis], [1, 1, n_components]) # Frequencies in Hz [n_batch, n_samples, n_components]. frequencies = []
import tensorflow_datasets as tfds sample_rate = DEFAULT_SAMPLE_RATE # 16000 """# Get a batch of data""" # Get a single example from NSynth. # Takes a few seconds to load from GCS. data_provider = data.NSynthTfds(split='test') dataset = data_provider.get_batch(batch_size=1, shuffle=False).take(1).repeat() batch = next(iter(dataset)) audio = batch['audio'] n_samples = audio.shape[1] specplot(audio) play(audio) """# Get a distribution strategy """ strategy = train_util.get_strategy() """# Get model and trainer ## python """ TIME_STEPS = 1000 # Create Neural Networks.