def test_save_as_wav(self): fs, x = decode_audio(WAV_MONO, fastwav=True) with tempfile.NamedTemporaryFile() as tf: with self.assertRaises(ValueError, msg='should not be able to save incorrect dims'): save_as_wav(tf.name, fs, x[:, 0]) with self.assertRaises(ValueError, msg='should not be able to save features'): save_as_wav(tf.name, fs, np.concatenate([x, x], axis=1)) with self.assertRaises(NotImplementedError, msg='should not be able to save stereo'): save_as_wav(tf.name, fs, np.concatenate([x, x], axis=2)) save_as_wav(tf.name, fs, x) fs2, x2 = decode_audio(tf.name, fastwav=True) self.assertTrue(np.array_equal(x, x2), 'should be lossless after save')
def incept(args): incept_dir = os.path.join(args.train_dir, 'incept') if not os.path.isdir(incept_dir): os.makedirs(incept_dir) # Create GAN graph z = tf.placeholder(tf.float32, [None, Z_DIM]) with tf.variable_scope('G'): G = MelspecGANGenerator() G_z = G(z, training=False) G_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope='G') step = tf.train.get_or_create_global_step() gan_saver = tf.train.Saver(var_list=G_vars + [step], max_to_keep=1) # Load or generate latents z_fp = os.path.join(incept_dir, 'z.pkl') if os.path.exists(z_fp): with open(z_fp, 'rb') as f: _zs = pickle.load(f) else: zs = tf.random.normal([args.incept_n, Z_DIM], dtype=tf.float32) with tf.Session() as sess: _zs = sess.run(zs) with open(z_fp, 'wb') as f: pickle.dump(_zs, f) # Load classifier graph incept_graph = tf.Graph() with incept_graph.as_default(): incept_saver = tf.train.import_meta_graph(args.incept_metagraph_fp) incept_x = incept_graph.get_tensor_by_name('x:0') incept_preds = incept_graph.get_tensor_by_name('scores:0') incept_sess = tf.Session(graph=incept_graph) incept_saver.restore(incept_sess, args.incept_ckpt_fp) # Create summaries summary_graph = tf.Graph() with summary_graph.as_default(): incept_mean = tf.placeholder(tf.float32, []) incept_std = tf.placeholder(tf.float32, []) summaries = [ tf.summary.scalar('incept_mean', incept_mean), tf.summary.scalar('incept_std', incept_std) ] summaries = tf.summary.merge(summaries) summary_writer = tf.summary.FileWriter(incept_dir) # Loop, waiting for checkpoints ckpt_fp = None _best_score = 0. while True: latest_ckpt_fp = tf.train.latest_checkpoint(args.train_dir) if latest_ckpt_fp != ckpt_fp: print('Incept: {}'.format(latest_ckpt_fp)) sess = tf.Session() gan_saver.restore(sess, latest_ckpt_fp) _step = sess.run(step) _G_z_feats = [] for i in range(0, args.incept_n, 100): _G_z_feats.append(sess.run(G_z, {z: _zs[i:i + 100]})) _G_z_feats = np.concatenate(_G_z_feats, axis=0) _G_zs = [] for i, _G_z in enumerate(_G_z_feats): _G_z = feats_denorm(_G_z).astype(np.float64) _audio = r9y9_melspec_to_waveform(_G_z, fs=args.data_sample_rate, waveform_len=16384) if i == 0: out_fp = os.path.join(incept_dir, '{}.wav'.format(str(_step).zfill(9))) save_as_wav(out_fp, args.data_sample_rate, _audio) _G_zs.append(_audio[:, 0, 0]) _preds = [] for i in range(0, args.incept_n, 100): _preds.append( incept_sess.run(incept_preds, {incept_x: _G_zs[i:i + 100]})) _preds = np.concatenate(_preds, axis=0) # Split into k groups _incept_scores = [] split_size = args.incept_n // args.incept_k for i in range(args.incept_k): _split = _preds[i * split_size:(i + 1) * split_size] _kl = _split * (np.log(_split) - np.log(np.expand_dims(np.mean(_split, 0), 0))) _kl = np.mean(np.sum(_kl, 1)) _incept_scores.append(np.exp(_kl)) _incept_mean, _incept_std = np.mean(_incept_scores), np.std( _incept_scores) # Summarize with tf.Session(graph=summary_graph) as summary_sess: _summaries = summary_sess.run(summaries, { incept_mean: _incept_mean, incept_std: _incept_std }) summary_writer.add_summary(_summaries, _step) # Save if _incept_mean > _best_score: gan_saver.save(sess, os.path.join(incept_dir, 'best_score'), _step) _best_score = _incept_mean sess.close() print('Done') ckpt_fp = latest_ckpt_fp time.sleep(1) incept_sess.close()
spec_fn = os.path.splitext(os.path.split(spec_fp)[1])[0] wave_fn = spec_fn + '.wav' wave_fp = os.path.join(args.out_dir, wave_fn) spec = np.load(spec_fp) if heuristic: wave = r9y9_melspec_to_waveform(spec) else: subseq_len = args.subseq_len X_mag = tacotron_mel_to_mag(spec[:, :, 0], inv_mel_filterbank) x_mag_original_length = X_mag.shape[0] x_mag_target_length = int( X_mag.shape[0] / subseq_len) * subseq_len + subseq_len X_mag = np.pad(X_mag, ([0, x_mag_target_length - X_mag.shape[0]], [0, 0]), 'constant') num_examples = int(x_mag_target_length / subseq_len) X_mag = np.reshape(X_mag, [num_examples, subseq_len, 513, 1]) gen_mags = [] for n in range(num_examples): _gen = gen_sess.run([gen_mag_spec], feed_dict={x_mag_input: X_mag[n:n + 1]})[0] gen_mags.append(_gen[0]) gen_mag = np.concatenate(gen_mags, axis=0) gen_mag = gen_mag[0:x_mag_original_length] wave = magspec_to_waveform_lws(gen_mag.astype('float64'), 1024, 256) save_as_wav(wave_fp, args.fs, wave)
def main(): parser = ArgumentParser() parser.add_argument('--input_dir', type=str) parser.add_argument('--output_dir', type=str) parser.add_argument('--meta_fp', type=str) parser.add_argument('--ckpt_fp', type=str) parser.add_argument('--heuristic', type=str) parser.add_argument('--n_mels', type=int) parser.add_argument('--fs', type=int) parser.add_argument('--subseq_len', type=int) parser.set_defaults(input_file=None, output_dir=None, ckpt_fp=None, meta_fp=None, heuristic="lws", n_mels=80, fs=22050, subseq_len=256) args = parser.parse_args() if not os.path.isdir(args.output_dir): os.makedirs(args.output_dir) gen_graph = tf.Graph() with gen_graph.as_default(): gan_saver = tf.train.import_meta_graph(args.meta_fp) gen_sess = tf.Session(graph=gen_graph) print("Restoring") gan_saver.restore(gen_sess, args.ckpt_fp) gen_mag_spec = gen_graph.get_tensor_by_name( 'generator/decoder_1/strided_slice_1:0') x_mag_input = gen_graph.get_tensor_by_name('ExpandDims_1:0') su = spectral_util.SpectralUtil(n_mels=args.n_mels, fs=args.fs) spec_fps = glob.glob(os.path.join(args.input_dir, '*.npy')) subseq_len = args.subseq_len start = time.time() for fidx, fp in enumerate(spec_fps): _mel_spec = np.load(fp)[:, :, 0] X_mag = su.tacotron_mel_to_mag(_mel_spec) x_mag_original_length = X_mag.shape[0] x_mag_target_length = int( X_mag.shape[0] / subseq_len) * subseq_len + subseq_len X_mag = np.pad(X_mag, ([0, x_mag_target_length - X_mag.shape[0]], [0, 0]), 'constant') num_examples = int(x_mag_target_length / subseq_len) X_mag = np.reshape(X_mag, [num_examples, subseq_len, 513, 1]) gen_mags = [] heuristic_mags = [] for n in range(num_examples): _gen, _heur = gen_sess.run([gen_mag_spec, x_mag_input], feed_dict={x_mag_input: X_mag[n:n + 1]}) _gen = np.clip(_gen, 0, None) gen_mags.append(_gen[0]) heuristic_mags.append(_heur[0]) gen_mag = np.concatenate(gen_mags, axis=0) heur_mag = np.concatenate(heuristic_mags, axis=0) _gen_audio = su.audio_from_mag_spec(gen_mag) gen_mag = gen_mag[0:x_mag_original_length] if args.heuristic == 'lws': _gen_audio = spectral.magspec_to_waveform_lws( gen_mag.astype('float64'), 1024, 256) elif args.heuristic == 'gl': _gen_audio = spectral.magspec_to_waveform_griffin_lim( gen_mag, 1024, 256) else: raise NotImplementedError() fn = fp.split("/")[-1][:-3] + "wav" output_file_name = os.path.join(args.output_dir, fn) print("Writing", fidx, output_file_name) audioio.save_as_wav(output_file_name, args.fs, _gen_audio) end = time.time() print("Execution Time in Seconds", end - start)