Example #1
0
  def test_save_as_wav(self):
    fs, x = decode_audio(WAV_MONO, fastwav=True)

    with tempfile.NamedTemporaryFile() as tf:
      with self.assertRaises(ValueError, msg='should not be able to save incorrect dims'):
        save_as_wav(tf.name, fs, x[:, 0])

      with self.assertRaises(ValueError, msg='should not be able to save features'):
        save_as_wav(tf.name, fs, np.concatenate([x, x], axis=1))

      with self.assertRaises(NotImplementedError, msg='should not be able to save stereo'):
        save_as_wav(tf.name, fs, np.concatenate([x, x], axis=2))

      save_as_wav(tf.name, fs, x)
      fs2, x2 = decode_audio(tf.name, fastwav=True)

      self.assertTrue(np.array_equal(x, x2), 'should be lossless after save')
Example #2
0
def incept(args):
    incept_dir = os.path.join(args.train_dir, 'incept')
    if not os.path.isdir(incept_dir):
        os.makedirs(incept_dir)

    # Create GAN graph
    z = tf.placeholder(tf.float32, [None, Z_DIM])
    with tf.variable_scope('G'):
        G = MelspecGANGenerator()
        G_z = G(z, training=False)
    G_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='G')
    step = tf.train.get_or_create_global_step()
    gan_saver = tf.train.Saver(var_list=G_vars + [step], max_to_keep=1)

    # Load or generate latents
    z_fp = os.path.join(incept_dir, 'z.pkl')
    if os.path.exists(z_fp):
        with open(z_fp, 'rb') as f:
            _zs = pickle.load(f)
    else:
        zs = tf.random.normal([args.incept_n, Z_DIM], dtype=tf.float32)
        with tf.Session() as sess:
            _zs = sess.run(zs)
        with open(z_fp, 'wb') as f:
            pickle.dump(_zs, f)

    # Load classifier graph
    incept_graph = tf.Graph()
    with incept_graph.as_default():
        incept_saver = tf.train.import_meta_graph(args.incept_metagraph_fp)
    incept_x = incept_graph.get_tensor_by_name('x:0')
    incept_preds = incept_graph.get_tensor_by_name('scores:0')
    incept_sess = tf.Session(graph=incept_graph)
    incept_saver.restore(incept_sess, args.incept_ckpt_fp)

    # Create summaries
    summary_graph = tf.Graph()
    with summary_graph.as_default():
        incept_mean = tf.placeholder(tf.float32, [])
        incept_std = tf.placeholder(tf.float32, [])
        summaries = [
            tf.summary.scalar('incept_mean', incept_mean),
            tf.summary.scalar('incept_std', incept_std)
        ]
        summaries = tf.summary.merge(summaries)
    summary_writer = tf.summary.FileWriter(incept_dir)

    # Loop, waiting for checkpoints
    ckpt_fp = None
    _best_score = 0.
    while True:
        latest_ckpt_fp = tf.train.latest_checkpoint(args.train_dir)
        if latest_ckpt_fp != ckpt_fp:
            print('Incept: {}'.format(latest_ckpt_fp))

            sess = tf.Session()

            gan_saver.restore(sess, latest_ckpt_fp)

            _step = sess.run(step)

            _G_z_feats = []
            for i in range(0, args.incept_n, 100):
                _G_z_feats.append(sess.run(G_z, {z: _zs[i:i + 100]}))
            _G_z_feats = np.concatenate(_G_z_feats, axis=0)
            _G_zs = []
            for i, _G_z in enumerate(_G_z_feats):
                _G_z = feats_denorm(_G_z).astype(np.float64)
                _audio = r9y9_melspec_to_waveform(_G_z,
                                                  fs=args.data_sample_rate,
                                                  waveform_len=16384)
                if i == 0:
                    out_fp = os.path.join(incept_dir,
                                          '{}.wav'.format(str(_step).zfill(9)))
                    save_as_wav(out_fp, args.data_sample_rate, _audio)
                _G_zs.append(_audio[:, 0, 0])

            _preds = []
            for i in range(0, args.incept_n, 100):
                _preds.append(
                    incept_sess.run(incept_preds,
                                    {incept_x: _G_zs[i:i + 100]}))
            _preds = np.concatenate(_preds, axis=0)

            # Split into k groups
            _incept_scores = []
            split_size = args.incept_n // args.incept_k
            for i in range(args.incept_k):
                _split = _preds[i * split_size:(i + 1) * split_size]
                _kl = _split * (np.log(_split) -
                                np.log(np.expand_dims(np.mean(_split, 0), 0)))
                _kl = np.mean(np.sum(_kl, 1))
                _incept_scores.append(np.exp(_kl))

            _incept_mean, _incept_std = np.mean(_incept_scores), np.std(
                _incept_scores)

            # Summarize
            with tf.Session(graph=summary_graph) as summary_sess:
                _summaries = summary_sess.run(summaries, {
                    incept_mean: _incept_mean,
                    incept_std: _incept_std
                })
            summary_writer.add_summary(_summaries, _step)

            # Save
            if _incept_mean > _best_score:
                gan_saver.save(sess, os.path.join(incept_dir, 'best_score'),
                               _step)
                _best_score = _incept_mean

            sess.close()

            print('Done')

            ckpt_fp = latest_ckpt_fp

        time.sleep(1)

    incept_sess.close()
Example #3
0
        spec_fn = os.path.splitext(os.path.split(spec_fp)[1])[0]
        wave_fn = spec_fn + '.wav'
        wave_fp = os.path.join(args.out_dir, wave_fn)

        spec = np.load(spec_fp)

        if heuristic:
            wave = r9y9_melspec_to_waveform(spec)
        else:
            subseq_len = args.subseq_len
            X_mag = tacotron_mel_to_mag(spec[:, :, 0], inv_mel_filterbank)
            x_mag_original_length = X_mag.shape[0]
            x_mag_target_length = int(
                X_mag.shape[0] / subseq_len) * subseq_len + subseq_len
            X_mag = np.pad(X_mag,
                           ([0, x_mag_target_length - X_mag.shape[0]], [0, 0]),
                           'constant')
            num_examples = int(x_mag_target_length / subseq_len)
            X_mag = np.reshape(X_mag, [num_examples, subseq_len, 513, 1])
            gen_mags = []
            for n in range(num_examples):
                _gen = gen_sess.run([gen_mag_spec],
                                    feed_dict={x_mag_input: X_mag[n:n + 1]})[0]
                gen_mags.append(_gen[0])
            gen_mag = np.concatenate(gen_mags, axis=0)
            gen_mag = gen_mag[0:x_mag_original_length]
            wave = magspec_to_waveform_lws(gen_mag.astype('float64'), 1024,
                                           256)

        save_as_wav(wave_fp, args.fs, wave)
Example #4
0
def main():
    parser = ArgumentParser()
    parser.add_argument('--input_dir', type=str)
    parser.add_argument('--output_dir', type=str)
    parser.add_argument('--meta_fp', type=str)
    parser.add_argument('--ckpt_fp', type=str)
    parser.add_argument('--heuristic', type=str)
    parser.add_argument('--n_mels', type=int)
    parser.add_argument('--fs', type=int)
    parser.add_argument('--subseq_len', type=int)

    parser.set_defaults(input_file=None,
                        output_dir=None,
                        ckpt_fp=None,
                        meta_fp=None,
                        heuristic="lws",
                        n_mels=80,
                        fs=22050,
                        subseq_len=256)
    args = parser.parse_args()

    if not os.path.isdir(args.output_dir):
        os.makedirs(args.output_dir)

    gen_graph = tf.Graph()
    with gen_graph.as_default():
        gan_saver = tf.train.import_meta_graph(args.meta_fp)

    gen_sess = tf.Session(graph=gen_graph)
    print("Restoring")
    gan_saver.restore(gen_sess, args.ckpt_fp)
    gen_mag_spec = gen_graph.get_tensor_by_name(
        'generator/decoder_1/strided_slice_1:0')
    x_mag_input = gen_graph.get_tensor_by_name('ExpandDims_1:0')

    su = spectral_util.SpectralUtil(n_mels=args.n_mels, fs=args.fs)

    spec_fps = glob.glob(os.path.join(args.input_dir, '*.npy'))
    subseq_len = args.subseq_len

    start = time.time()
    for fidx, fp in enumerate(spec_fps):
        _mel_spec = np.load(fp)[:, :, 0]
        X_mag = su.tacotron_mel_to_mag(_mel_spec)
        x_mag_original_length = X_mag.shape[0]
        x_mag_target_length = int(
            X_mag.shape[0] / subseq_len) * subseq_len + subseq_len
        X_mag = np.pad(X_mag,
                       ([0, x_mag_target_length - X_mag.shape[0]], [0, 0]),
                       'constant')
        num_examples = int(x_mag_target_length / subseq_len)
        X_mag = np.reshape(X_mag, [num_examples, subseq_len, 513, 1])
        gen_mags = []
        heuristic_mags = []
        for n in range(num_examples):
            _gen, _heur = gen_sess.run([gen_mag_spec, x_mag_input],
                                       feed_dict={x_mag_input: X_mag[n:n + 1]})

            _gen = np.clip(_gen, 0, None)

            gen_mags.append(_gen[0])
            heuristic_mags.append(_heur[0])
        gen_mag = np.concatenate(gen_mags, axis=0)
        heur_mag = np.concatenate(heuristic_mags, axis=0)

        _gen_audio = su.audio_from_mag_spec(gen_mag)
        gen_mag = gen_mag[0:x_mag_original_length]

        if args.heuristic == 'lws':
            _gen_audio = spectral.magspec_to_waveform_lws(
                gen_mag.astype('float64'), 1024, 256)
        elif args.heuristic == 'gl':
            _gen_audio = spectral.magspec_to_waveform_griffin_lim(
                gen_mag, 1024, 256)
        else:
            raise NotImplementedError()

        fn = fp.split("/")[-1][:-3] + "wav"
        output_file_name = os.path.join(args.output_dir, fn)
        print("Writing", fidx, output_file_name)
        audioio.save_as_wav(output_file_name, args.fs, _gen_audio)
    end = time.time()
    print("Execution Time in Seconds", end - start)