def experiment_ford_helper(ckpt_dir, data_dir, plot_type="spectrogram", sound_mode="save", f0_denom=1., n_harmonic_distribution=60, n_noise_magnitudes=65, losses=None, feature_domain="freq", model=None): ''' Code general for all Ford experiments. ''' logging.info("Loading data...") data_provider = TFRecordProvider(data_dir) input_tensor = data_provider.get_single_batch(batch_number=1) #input_tensor["f0"] = tf.convert_to_tensor(np.flip(np.arange(32., 33., 100./np.size(input_tensor["f0"]))), dtype=tf.float32)[tf.newaxis,:,tf.newaxis] #input_tensor["f0"] = tf.convert_to_tensor(np.arange(1., 200., 100./np.size(input_tensor["f0"])), dtype=tf.float32)[tf.newaxis,:,tf.newaxis] #N = np.size(input_tensor["f0"]) #x = 2*np.pi/N*np.arange(1,N) #y = 100 + 50*np.sin(x) #input_tensor["f0"] = tf.convert_to_tensor(y, dtype=tf.float32)[tf.newaxis,:,tf.newaxis] #input_tensor["f0"] += 20. #input_tensor.pop("osc", None) logging.info("Building model...") if model is None: model = ModelBuilder(model_type="f0_rnn_fc_hpn_decoder", audio_rate=data_provider.audio_rate, input_rate=data_provider.input_rate, window_secs=data_provider.example_secs, f0_denom=f0_denom, checkpoint_dir=ckpt_dir, n_harmonic_distribution=n_harmonic_distribution, n_noise_magnitudes=n_noise_magnitudes, losses=losses, feature_domain=feature_domain).build() logging.info("Normalizing inputs...") features = model.encode(input_tensor) logging.info("Synthesizing from f0 signal...") start = time.time() output_tensor = model.decode(features, training=False) time_elapsed = time.time() - start logging.info("Synthesis took %.3f seconds." % time_elapsed) logging.info("Plotting signals...") audio_in = features["audio"].numpy()[0,:] audio_out = output_tensor.numpy()[0,:] f0 = input_tensor["f0"].numpy()[0,:] f0_scaled = features["f0_scaled"].numpy()[0,:] if plot_type == "signal": _, ax = plt.subplots(4, 1, figsize=(10, 8)) ax[0].plot(audio_in) ax[1].plot(audio_out) ax[2].plot(f0) ax[3].plot(f0_scaled) elif plot_type == "spectrogram": '''mag_in = spectral_ops.compute_mag(audio_in, size=8192).numpy().T plt.imshow(mag_in, origin="lower") plt.show() pdb.set_trace()''' n_fft = 4096 n_mels = int(n_fft/8) audio_dict = {"recording": audio_in, "synthesized": audio_out} for key in audio_dict.keys(): plt.figure() plot_audio_f0(audio_dict[key], data_provider.audio_rate, f0, data_provider.input_rate, title=key, n_fft=n_fft, n_mels=n_mels) plt.show() if sound_mode == "play": logging.info("Playing original audio...") sd.play(audio_in, data_provider.audio_rate) sd.wait() logging.info("Playing synthesized audio...") sd.play(audio_out, data_provider.audio_rate) sd.wait() elif sound_mode == "save": audio_in_path = "./audio_in.wav" audio_out_path = "./audio_out.wav" logging.info("Saving recorded audio to '%s'..." % audio_in_path) sf.write(audio_in_path, audio_in, data_provider.audio_rate) logging.info("Saving synthesized audio to '%s'..." % audio_out_path) sf.write(audio_out_path, audio_out, data_provider.audio_rate)