Ejemplo n.º 1
0
    def test_inverse_r9y9(self):
        self.assertEqual(self.wav_mono_22.shape, (82432, 1, 1),
                         'invalid shape')

        melspec = spectral.waveform_to_r9y9_melspec(self.wav_mono_22)
        inv_melspec_lws = spectral.r9y9_melspec_to_waveform(
            melspec, phase_estimation='lws', waveform_len=82432)
        self.assertEqual(inv_melspec_lws.shape, self.wav_mono_22.shape,
                         'invalid shape')

        np.random.seed(0)
        inv_melspec_gl10 = spectral.r9y9_melspec_to_waveform(
            melspec, phase_estimation='gl10', waveform_len=82432)
        self.assertEqual(inv_melspec_gl10.shape, self.wav_mono_22.shape,
                         'invalid shape')

        x_env = np.abs(sphilbert(self.wav_mono_22[:, 0, 0]))

        x_lws_env = np.abs(sphilbert(inv_melspec_lws[:, 0, 0]))
        env_l1 = np.mean(np.abs(x_env - x_lws_env))
        self.assertAlmostEqual(env_l1, 0.01737, 4,
                               'bad envelope after lws inverse')

        x_gl10_env = np.abs(sphilbert(inv_melspec_gl10[:, 0, 0]))
        env_l1 = np.mean(np.abs(x_env - x_gl10_env))
        self.assertAlmostEqual(env_l1, 0.01686, 4,
                               'bad envelope after gl10 inverse')
Ejemplo n.º 2
0
def r9y9_melspec_to_approx_audio(x, fs, waveform_len, n=None):
    if n is not None:
        x = x[:n]

    inv_closure = lambda _x: spectral.r9y9_melspec_to_waveform(
        _x.astype(np.float64), fs=fs, waveform_len=waveform_len)

    inv_pyfn = lambda x_item: tf.py_func(
        inv_closure, [x_item], tf.float32, stateful=False)

    return tf.map_fn(inv_pyfn, x)
Ejemplo n.º 3
0
def incept(args):
    incept_dir = os.path.join(args.train_dir, 'incept')
    if not os.path.isdir(incept_dir):
        os.makedirs(incept_dir)

    # Create GAN graph
    z = tf.placeholder(tf.float32, [None, Z_DIM])
    with tf.variable_scope('G'):
        G = MelspecGANGenerator()
        G_z = G(z, training=False)
    G_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='G')
    step = tf.train.get_or_create_global_step()
    gan_saver = tf.train.Saver(var_list=G_vars + [step], max_to_keep=1)

    # Load or generate latents
    z_fp = os.path.join(incept_dir, 'z.pkl')
    if os.path.exists(z_fp):
        with open(z_fp, 'rb') as f:
            _zs = pickle.load(f)
    else:
        zs = tf.random.normal([args.incept_n, Z_DIM], dtype=tf.float32)
        with tf.Session() as sess:
            _zs = sess.run(zs)
        with open(z_fp, 'wb') as f:
            pickle.dump(_zs, f)

    # Load classifier graph
    incept_graph = tf.Graph()
    with incept_graph.as_default():
        incept_saver = tf.train.import_meta_graph(args.incept_metagraph_fp)
    incept_x = incept_graph.get_tensor_by_name('x:0')
    incept_preds = incept_graph.get_tensor_by_name('scores:0')
    incept_sess = tf.Session(graph=incept_graph)
    incept_saver.restore(incept_sess, args.incept_ckpt_fp)

    # Create summaries
    summary_graph = tf.Graph()
    with summary_graph.as_default():
        incept_mean = tf.placeholder(tf.float32, [])
        incept_std = tf.placeholder(tf.float32, [])
        summaries = [
            tf.summary.scalar('incept_mean', incept_mean),
            tf.summary.scalar('incept_std', incept_std)
        ]
        summaries = tf.summary.merge(summaries)
    summary_writer = tf.summary.FileWriter(incept_dir)

    # Loop, waiting for checkpoints
    ckpt_fp = None
    _best_score = 0.
    while True:
        latest_ckpt_fp = tf.train.latest_checkpoint(args.train_dir)
        if latest_ckpt_fp != ckpt_fp:
            print('Incept: {}'.format(latest_ckpt_fp))

            sess = tf.Session()

            gan_saver.restore(sess, latest_ckpt_fp)

            _step = sess.run(step)

            _G_z_feats = []
            for i in range(0, args.incept_n, 100):
                _G_z_feats.append(sess.run(G_z, {z: _zs[i:i + 100]}))
            _G_z_feats = np.concatenate(_G_z_feats, axis=0)
            _G_zs = []
            for i, _G_z in enumerate(_G_z_feats):
                _G_z = feats_denorm(_G_z).astype(np.float64)
                _audio = r9y9_melspec_to_waveform(_G_z,
                                                  fs=args.data_sample_rate,
                                                  waveform_len=16384)
                if i == 0:
                    out_fp = os.path.join(incept_dir,
                                          '{}.wav'.format(str(_step).zfill(9)))
                    save_as_wav(out_fp, args.data_sample_rate, _audio)
                _G_zs.append(_audio[:, 0, 0])

            _preds = []
            for i in range(0, args.incept_n, 100):
                _preds.append(
                    incept_sess.run(incept_preds,
                                    {incept_x: _G_zs[i:i + 100]}))
            _preds = np.concatenate(_preds, axis=0)

            # Split into k groups
            _incept_scores = []
            split_size = args.incept_n // args.incept_k
            for i in range(args.incept_k):
                _split = _preds[i * split_size:(i + 1) * split_size]
                _kl = _split * (np.log(_split) -
                                np.log(np.expand_dims(np.mean(_split, 0), 0)))
                _kl = np.mean(np.sum(_kl, 1))
                _incept_scores.append(np.exp(_kl))

            _incept_mean, _incept_std = np.mean(_incept_scores), np.std(
                _incept_scores)

            # Summarize
            with tf.Session(graph=summary_graph) as summary_sess:
                _summaries = summary_sess.run(summaries, {
                    incept_mean: _incept_mean,
                    incept_std: _incept_std
                })
            summary_writer.add_summary(_summaries, _step)

            # Save
            if _incept_mean > _best_score:
                gan_saver.save(sess, os.path.join(incept_dir, 'best_score'),
                               _step)
                _best_score = _incept_mean

            sess.close()

            print('Done')

            ckpt_fp = latest_ckpt_fp

        time.sleep(1)

    incept_sess.close()
Ejemplo n.º 4
0
    inv_mel_filterbank = create_inverse_mel_filterbank(args.fs,
                                                       1024,
                                                       fmin=125,
                                                       fmax=7600,
                                                       n_mels=80)

    spec_fps = glob.glob(os.path.join(args.spec_dir, '*.npy'))
    for i, spec_fp in tqdm(enumerate(spec_fps)):
        spec_fn = os.path.splitext(os.path.split(spec_fp)[1])[0]
        wave_fn = spec_fn + '.wav'
        wave_fp = os.path.join(args.out_dir, wave_fn)

        spec = np.load(spec_fp)

        if heuristic:
            wave = r9y9_melspec_to_waveform(spec)
        else:
            subseq_len = args.subseq_len
            X_mag = tacotron_mel_to_mag(spec[:, :, 0], inv_mel_filterbank)
            x_mag_original_length = X_mag.shape[0]
            x_mag_target_length = int(
                X_mag.shape[0] / subseq_len) * subseq_len + subseq_len
            X_mag = np.pad(X_mag,
                           ([0, x_mag_target_length - X_mag.shape[0]], [0, 0]),
                           'constant')
            num_examples = int(x_mag_target_length / subseq_len)
            X_mag = np.reshape(X_mag, [num_examples, subseq_len, 513, 1])
            gen_mags = []
            for n in range(num_examples):
                _gen = gen_sess.run([gen_mag_spec],
                                    feed_dict={x_mag_input: X_mag[n:n + 1]})[0]