Esempio n. 1
0
def experiment_ford_helper(ckpt_dir, data_dir, plot_type="spectrogram",
                                               sound_mode="save",
                                               f0_denom=1.,
                                               n_harmonic_distribution=60,
                                               n_noise_magnitudes=65,
                                               losses=None,
                                               feature_domain="freq",
                                               model=None):
    '''
    Code general for all Ford experiments.
    '''
    logging.info("Loading data...")
    data_provider = TFRecordProvider(data_dir)
    input_tensor = data_provider.get_single_batch(batch_number=1)
    #input_tensor["f0"] = tf.convert_to_tensor(np.flip(np.arange(32., 33., 100./np.size(input_tensor["f0"]))), dtype=tf.float32)[tf.newaxis,:,tf.newaxis]
    #input_tensor["f0"] = tf.convert_to_tensor(np.arange(1., 200., 100./np.size(input_tensor["f0"])), dtype=tf.float32)[tf.newaxis,:,tf.newaxis]
    #N = np.size(input_tensor["f0"])
    #x = 2*np.pi/N*np.arange(1,N)
    #y = 100 + 50*np.sin(x)
    #input_tensor["f0"] = tf.convert_to_tensor(y, dtype=tf.float32)[tf.newaxis,:,tf.newaxis]
    #input_tensor["f0"] += 20.
    
    #input_tensor.pop("osc", None)

    logging.info("Building model...")
    if model is None:
        model = ModelBuilder(model_type="f0_rnn_fc_hpn_decoder",
                            audio_rate=data_provider.audio_rate,
                            input_rate=data_provider.input_rate,
                            window_secs=data_provider.example_secs,
                            f0_denom=f0_denom,
                            checkpoint_dir=ckpt_dir,
                            n_harmonic_distribution=n_harmonic_distribution,
                            n_noise_magnitudes=n_noise_magnitudes,
                            losses=losses,
                            feature_domain=feature_domain).build()

    logging.info("Normalizing inputs...")
    features = model.encode(input_tensor)

    logging.info("Synthesizing from f0 signal...")
    start = time.time()
    output_tensor = model.decode(features, training=False)
    time_elapsed = time.time() - start
    logging.info("Synthesis took %.3f seconds." % time_elapsed)

    logging.info("Plotting signals...")
    audio_in = features["audio"].numpy()[0,:]
    audio_out = output_tensor.numpy()[0,:]
    f0 = input_tensor["f0"].numpy()[0,:]
    f0_scaled = features["f0_scaled"].numpy()[0,:]
    if plot_type == "signal":
        _, ax = plt.subplots(4, 1, figsize=(10, 8))
        ax[0].plot(audio_in)
        ax[1].plot(audio_out)
        ax[2].plot(f0)
        ax[3].plot(f0_scaled)
    elif plot_type == "spectrogram":
        '''mag_in = spectral_ops.compute_mag(audio_in, size=8192).numpy().T
        plt.imshow(mag_in, origin="lower")
        plt.show()
        pdb.set_trace()'''
        n_fft = 4096
        n_mels = int(n_fft/8)
        audio_dict = {"recording": audio_in, "synthesized": audio_out}
        for key in audio_dict.keys():
            plt.figure()
            plot_audio_f0(audio_dict[key], data_provider.audio_rate, f0, data_provider.input_rate, title=key, n_fft=n_fft, n_mels=n_mels)
    plt.show()

    if sound_mode == "play":
        logging.info("Playing original audio...")
        sd.play(audio_in, data_provider.audio_rate)
        sd.wait()
        logging.info("Playing synthesized audio...")
        sd.play(audio_out, data_provider.audio_rate)
        sd.wait()
    elif sound_mode == "save":
        audio_in_path = "./audio_in.wav"
        audio_out_path = "./audio_out.wav"
        logging.info("Saving recorded audio to '%s'..." % audio_in_path)
        sf.write(audio_in_path, audio_in, data_provider.audio_rate)
        logging.info("Saving synthesized audio to '%s'..." % audio_out_path)
        sf.write(audio_out_path, audio_out, data_provider.audio_rate)