def eval(): # Load graph g = Graph(mode="eval"); print("Evaluation Graph loaded") # Load data fpaths, text_lengths, texts = load_data(mode="eval") # Parse text = np.fromstring(texts[0], np.int32) # (None,) fname, mel, mag = load_spectrograms(fpaths[0]) x = np.expand_dims(text, 0) # (1, None) y = np.expand_dims(mel, 0) # (1, None, n_mels*r) z = np.expand_dims(mag, 0) # (1, None, n_mfccs) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!") writer = tf.summary.FileWriter(hp.logdir, sess.graph) # Feed Forward ## mel y_hat = np.zeros((1, y.shape[1], y.shape[2]), np.float32) # hp.n_mels*hp.r for j in range(y.shape[1]): _y_hat = sess.run(g.y_hat, {g.x: x, g.y: y_hat}) y_hat[:, j, :] = _y_hat[:, j, :] ## mag merged, gs = sess.run([g.merged, g.global_step], {g.x:x, g.y:y, g.y_hat: y_hat, g.z: z}) writer.add_summary(merged, global_step=gs) writer.close()
def eval(): # Load graph g = Graph(mode="eval"); print("Evaluation Graph loaded") # Load data fpaths, text_lengths, texts = load_data(mode="eval") # Parse text = np.fromstring(texts[0], np.int32) # (None,) fname, mel, mag = load_spectrograms(fpaths[0]) x = np.expand_dims(text, 0) # (1, None) y = np.expand_dims(mel, 0) # (1, None, n_mels*r) z = np.expand_dims(mag, 0) # (1, None, n_mfccs) saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!") writer = tf.summary.FileWriter(hp.logdir, sess.graph) # Feed Forward ## mel y_hat = np.zeros((1, y.shape[1], y.shape[2]), np.float32) # hp.n_mels*hp.r for j in range(y.shape[1]): _y_hat = sess.run(g.y_hat, {g.x: x, g.y: y_hat}) y_hat[:, j, :] = _y_hat[:, j, :] ## mag merged, gs = sess.run([g.merged, g.global_step], {g.x:x, g.y:y, g.y_hat: y_hat, g.z: z}) writer.add_summary(merged, global_step=gs) writer.close()
def proc(fpath, hp, extension): if not os.path.isfile(fpath): return fname, mel, mag, full_mel = load_spectrograms(hp, fpath) np.save("{}/{}".format(hp.coarse_audio_dir, fname.replace(extension, ".npy")), mel) np.save("{}/{}".format(hp.full_audio_dir, fname.replace(extension, ".npy")), mag) np.save("{}/{}".format(hp.full_mel_dir, fname.replace(extension, ".npy")), full_mel)
def prepro(datadir): # Load data fpaths, _, _ = load_data(datadir) # list for fpath in tqdm.tqdm(fpaths): fname, mel, mag = load_spectrograms(fpath) if not os.path.exists(datadir + hp.meldir): os.makedirs(datadir + hp.meldir) if not os.path.exists(datadir + hp.magdir): os.makedirs(datadir + hp.magdir) np.save( datadir + hp.meldir + "/{}".format(fname.replace("wav", "npy")), mel) np.save( datadir + hp.magdir + "/{}".format(fname.replace("wav", "npy")), mag)
def synthesize(): if not os.path.exists(hp.sampledir): os.mkdir(hp.sampledir) # Load data texts = load_data(mode="synthesize") # reference audio mels, maxlen = [], 0 files = glob(hp.ref_audio) for f in files: _, mel, _= load_spectrograms(f) mel = np.reshape(mel, (-1, hp.n_mels)) maxlen = max(maxlen, mel.shape[0]) mels.append(mel) ref = np.zeros((len(mels), maxlen, hp.n_mels), np.float32) for i, m in enumerate(mels): ref[i, :m.shape[0], :] = m # Load graph g = Graph(mode="synthesize"); print("Graph loaded") saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)); print("Restored!") # Feed Forward ## mel y_hat = np.zeros((texts.shape[0], 200, hp.n_mels*hp.r), np.float32) # hp.n_mels*hp.r for j in tqdm.tqdm(range(200)): _y_hat = sess.run(g.y_hat, {g.x: texts, g.y: y_hat, g.ref: ref}) y_hat[:, j, :] = _y_hat[:, j, :] ## mag mags = sess.run(g.z_hat, {g.y_hat: y_hat}) for i, mag in enumerate(mags): print("File {}.wav is being generated ...".format(i+1)) audio = spectrogram2wav(mag) write(os.path.join(hp.sampledir, '{}.wav'.format(i+1)), hp.sr, audio)
import pygame from os import listdir from os.path import isfile, join existing_samples = [f for f in listdir("LJSpeech-1.1/wavs") if isfile(join("LJSpeech-1.1/wavs", f))] existing_identifiers = [sample.replace(".wav", "") for sample in existing_samples] print("{} existing samples found. Preloading transcript...".format(len(existing_samples))) with open("LJSpeech-1.1/metadata_all.csv", "r") as all_metadata_file: with open("LJSpeech-1.1/transcript.csv", "w") as transcript_file: for row in all_metadata_file.readlines(): splitted_row = row.split("|") identifier = splitted_row[0] if identifier in existing_identifiers: transcript_file.write(row) print("Done with preloading transcript.") # Load data fpaths, _, _ = load_data() # list for fpath in tqdm.tqdm(fpaths): fname, mel, mag = load_spectrograms(fpath) if not os.path.exists("mels"): os.mkdir("mels") if not os.path.exists("mags"): os.mkdir("mags") np.save("mels/{}".format(fname.replace("wav", "npy")), mel) np.save("mags/{}".format(fname.replace("wav", "npy")), mag)
def f(fpath): fname, mel, mag = load_spectrograms(fpath) np.save("mels/{}".format(fname.replace("wav", "npy")), mel) np.save("mags/{}".format(fname.replace("wav", "npy")), mag) return None
import numpy as np from hyperparams import Hyperparams as hp import tqdm if not os.path.exists("mels"): os.mkdir("mels") if not os.path.exists("worlds"): os.mkdir("worlds") X = [] Y = [] XTest = [] YTest = [] # data directory data_list = os.listdir(hp.data_dir) data_lenght = len(data_list) for i in tqdm.tqdm(range(0, data_lenght)): try: #print(i,'de: ',data_lenght) mel = np.array(load_spectrograms(hp.data_dir + '/' + data_list[i])) # mel spectrogram world = wav2world(hp.data_dir + '/' + data_list[i]) num_padding = mel.shape[0] * 8 - world.shape[0] world = np.pad(world, [[0, num_padding], [0, 0]], mode="constant") np.save("mels/{}".format(data_list[i].replace("wav", "npy")), mel) np.save("worlds/{}".format(data_list[i].replace("wav", "npy")), world) except: continue print('preprocessing ok !!!')
def evaluate(): # Load graph g = Graph(mode="evaluate") print("Graph loaded") # Load data fpaths, _, texts = load_data(mode="evaluate") lengths = [len(t) for t in texts] maxlen = sorted(lengths, reverse=True)[0] new_texts = np.zeros((len(texts), maxlen), np.int32) for i, text in enumerate(texts): new_texts[i, :len(text)] = [idx for idx in text] #new_texts = np.split(new_texts, 2) new_texts = new_texts[:evaluate_wav_num] half_size = int(len(fpaths) / 2) print(half_size) #new_fpaths = [fpaths[:half_size], fpaths[half_size:]] fpaths = fpaths[:evaluate_wav_num] saver = tf.train.Saver() with tf.Session() as sess: saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Evaluate Model Restored!") """ err = 0.0 for i, t_split in enumerate(new_texts): y_hat = np.zeros((t_split.shape[0], 200, hp.n_mels*hp.r), np.float32) # hp.n_mels*hp.r for j in tqdm.tqdm(range(200)): _y_hat = sess.run(g.y_hat, {g.x: t_split, g.y: y_hat}) y_hat[:, j, :] = _y_hat[:, j, :] mags = sess.run(g.z_hat, {g.y_hat: y_hat}) for k, mag in enumerate(mags): fname, mel_ans, mag_ans = load_spectrograms(new_fpaths[i][k]) print("File {} is being evaluated ...".format(fname)) audio = spectrogram2wav(mag) audio_ans = spectrogram2wav(mag_ans) err += calculate_mse(audio, audio_ans) err = err/float(len(fpaths)) print(err) """ # Feed Forward ## mel y_hat = np.zeros((new_texts.shape[0], 200, hp.n_mels * hp.r), np.float32) # hp.n_mels*hp.r for j in tqdm.tqdm(range(200)): _y_hat = sess.run(g.y_hat, {g.x: new_texts, g.y: y_hat}) y_hat[:, j, :] = _y_hat[:, j, :] ## mag mags = sess.run(g.z_hat, {g.y_hat: y_hat}) err = 0.0 for i, mag in enumerate(mags): fname, mel_ans, mag_ans = load_spectrograms(fpaths[i]) print("File {} is being evaluated ...".format(fname)) #audio = spectrogram2wav(mag) #audio_ans = spectrogram2wav(mag_ans) #err += calculate_mse(audio, audio_ans) err += calculate_mse(mag, mag_ans) err = err / float(len(fpaths)) print(err) opf.write(hp.logdir + " spectrogram mse: " + str(err) + "\n")
for i in range(lenx): mse = mse + RMSE(X[i], Y[i]) return mse / lenx esperado_dir = '../avaliacao-subjetiva/Esperado/' exp_dir = '../avaliacao-subjetiva/Preditos/' exp_list = list(os.listdir(exp_dir)) esperado_list = list(os.listdir(esperado_dir)) esp_mag = [] esp_db = [] for i in esperado_list: if i[-4:] == '.wav': file_id = i[:-4] _, _, mag = load_spectrograms(os.path.join(esperado_dir, i)) db = librosa.amplitude_to_db(mag, ref=np.max) display.specshow(db, y_axis='log', x_axis='time') save_img_dir = os.path.join(esperado_dir, i.replace('.wav', '.png')) esp_mag.append([file_id, mag]) esp_db.append([file_id, db]) plt.title('Espectrograma STFT') plt.colorbar(format='%+2.0f dB') plt.tight_layout() plt.savefig(save_img_dir) plt.cla() # Clear axis plt.clf()
from hyperparams import Hyperparams as hp # import the necessary packages from skimage.measure import structural_similarity as ssim import matplotlib.pyplot as plt import numpy as np import cv2 cinco_org = np.load('../savedir/synthesized-audios/RTISI-LA/5-org.npy') savedir = '../savedir/spectogramas/' vocoders = ['RTISI-LA', 'Griff-Lim'] for voco in vocoders: diretory = os.path.join('../savedir/synthesized-audios/', voco) arquivos = ['1.wav', '5.wav', '5-org.wav', '3.wav', '3-org.wav'] for i in arquivos: _, _, mag = load_spectrograms(os.path.join(diretory, i)) print(os.path.join(diretory, i), ' :', mag.shape) if i == '5.wav': mag = mag[:cinco_org.shape[0]][:] np.save(os.path.join(diretory, i.replace('.wav', '')), mag) # transpose mag = mag.T # de-noramlize mag = (np.clip(mag, 0, 1) * hp.max_db) - hp.max_db + hp.ref_db # to amplitude mag = np.power(10.0, mag * 0.05) mag = mag**hp.power display.specshow(librosa.amplitude_to_db(mag, ref=np.max), y_axis='log', x_axis='time')
def synthesize(): if not os.path.exists(hp.sampledir): os.mkdir(hp.sampledir) # Load data texts = load_data(mode="synthesize") # pad texts to multiple of batch_size texts_len = texts.shape[0] num_batches = int(ceil(float(texts_len) / hp.batch_size)) padding_len = num_batches * hp.batch_size - texts_len texts = np.pad(texts, ((0, padding_len), (0, 0)), 'constant', constant_values=0) # reference audio mels, maxlen = [], 0 files = glob(hp.ref_audio) for f in files: _, mel, _ = load_spectrograms(f) mel = np.reshape(mel, (-1, hp.n_mels)) maxlen = max(maxlen, mel.shape[0]) mels.append(mel) ref = np.zeros((len(mels), maxlen, hp.n_mels), np.float32) for i, m in enumerate(mels): ref[i, :m.shape[0], :] = m # Load graph g = Graph(mode="synthesize") print("Graph loaded") saver = tf.train.Saver() with tf.Session() as sess: if len(sys.argv) == 1: saver.restore(sess, tf.train.latest_checkpoint(hp.logdir)) print("Restored latest checkpoint") else: saver.restore(sess, sys.argv[1]) print("Restored checkpoint: %s" % sys.argv[1]) batches = [ texts[i:i + hp.batch_size] for i in range(0, texts.shape[0], hp.batch_size) ] start = 0 batch_index = 0 # Feed Forward for batch in batches: ref_batch, start = looper(ref, start, hp.batch_size) ## mel y_hat = np.zeros((batch.shape[0], 200, hp.n_mels * hp.r), np.float32) # hp.n_mels*hp.r for j in tqdm.tqdm(range(200)): _y_hat = sess.run(g.y_hat, { g.x: batch, g.y: y_hat, g.ref: ref_batch }) y_hat[:, j, :] = _y_hat[:, j, :] ## mag mags = sess.run(g.z_hat, {g.y_hat: y_hat}) for i, mag in enumerate(mags): index_label = batch_index * hp.batch_size + i + 1 if index_label > texts_len: break print("File {}.wav is being generated ...".format(index_label)) audio = spectrogram2wav(mag) write(os.path.join(hp.sampledir, '{}.wav'.format(index_label)), hp.sr, audio) batch_index += 1