def create_write_files(ret,sess,g,x,mname,cdir,typeS): x = np.expand_dims(x, axis=0) mel_output = np.zeros((1, hp.T_y // hp.r, hp.n_mels * hp.r), np.float32) decoder_output = np.zeros((1, hp.T_y // hp.r, hp.embed_size), np.float32) alignments_li = np.zeros((hp.dec_layers, hp.T_x, hp.T_y//hp.r), np.float32) prev_max_attentions_li = np.zeros((hp.dec_layers, 1), np.int32) for j in range(hp.T_y // hp.r): _gs, _mel_output, _decoder_output, _max_attentions_li, _alignments_li = \ sess.run([g.global_step, g.mel_output, g.decoder_output, g.max_attentions_li, g.alignments_li], {g.x: x, g.y1: mel_output, g.prev_max_attentions_li:prev_max_attentions_li}) mel_output[:, j, :] = _mel_output[:, j, :] decoder_output[:, j, :] = _decoder_output[:, j, :] prev_max_attentions_li = np.array(_max_attentions_li)[:, :, j] #mag_output = sess.run([g.mag_output], {g.mel_output: mel_output}) mag_output = sess.run(g.mag_output, {g.decoder_output: decoder_output}) x = np.squeeze(x, axis=0) txt = invert_text(x) mag_output = np.squeeze(mag_output[0]) try: wav = spectrogram2wav(mag_output) wav, _ = librosa.effects.trim(wav) write(cdir + "/{}mag.wav".format(mname), hp.sr, wav) ret.append([txt,wav,typeS+"_world"]) except Exception: sys.exc_clear() return ret
def create_write_files(ret, sess, g, x, mname, cdir, typeS): x = np.expand_dims(x, axis=0) x = np.append(x, np.zeros((hp.batch_size - 1, hp.T_x)), axis=0) mel_output = np.zeros((hp.batch_size, hp.T_y // hp.r, hp.n_mels * hp.r), np.float32) _gs, mel_output = \ sess.run([g.global_step, g.mel_output], {g.x: x, g.y1: mel_output}) mag_output = sess.run(g.mag_output, {g.converter_input: mel_output}) # x = np.squeeze(x[0], axis=0) x = x[0] txt = invert_text(x) mag_output = np.squeeze(mag_output[0]) try: wav = spectrogram2wav(mag_output) wav, _ = librosa.effects.trim(wav) write(cdir + "/{}mag.wav".format(mname), hp.sr, wav) ret.append([txt, wav, typeS + "_world", mel_output, mag_output]) except Exception: sys.exc_clear() return ret
def create_write_files_conv(ret, sess, mel_in, g, x, mname, cdir, typeS): mag_output = sess.run(g.mag_output, {g.converter_input: mel_in}) txt = invert_text(x) mag_output = np.squeeze(mag_output[0]) try: wav = spectrogram2wav(mag_output) wav, _ = librosa.effects.trim(wav) write(cdir + "/{}mag.wav".format(mname), hp.sr, wav) ret.append([txt, wav, typeS + "_world", mel_in, mag_output]) except Exception: sys.exc_clear() return ret
def create_write_files(ret, sess, g, x, mname, cdir, samples, typeS): # for i in range(0, len(x), hp.batch_size): # x2 = x[i:i+hp.batch_size] # Get melspectrogram mel_output = np.zeros((hp.batch_size, hp.T_y // hp.r, hp.n_mels * hp.r), np.float32) decoder_output = np.zeros((hp.batch_size, hp.T_y // hp.r, hp.embed_size), np.float32) alignments_li = np.zeros((hp.dec_layers, hp.T_x, hp.T_y // hp.r), np.float32) prev_max_attentions_li = np.zeros((hp.dec_layers, hp.batch_size), np.int32) #alignments = np.zeros((hp.T_x, hp.T_y//hp.r), np.float32) for j in range((hp.T_y // hp.r) // hp.rwin): _gs, _mel_output, _decoder_output, _max_attentions_li, _alignments_li = \ sess.run([g.global_step, g.mel_output, g.decoder_output, g.max_attentions_li, g.alignments_li], {g.x: x, g.y1: mel_output, g.prev_max_attentions_li:prev_max_attentions_li}) mel_output[:, j * hp.rwin:(j + 1) * hp.rwin, :] = _mel_output[:, j * hp.rwin:(j + 1) * hp.rwin, :] decoder_output[:, j * hp.rwin:(j + 1) * hp.rwin, :] = _decoder_output[:, j * hp.rwin:(j + 1) * hp.rwin, :] #alignments_li[:, :, j*hp.rwin:(j+1)*hp.rwin] = np.array(_alignments_li)[:, :, j*hp.rwin:(j+1)*hp.rwin] prev_max_attentions_li = np.array(_max_attentions_li)[:, :, j * hp.rwin] # Get magnitude mag_output = sess.run(g.mag_output, {g.decoder_output: decoder_output}) z_list = random.sample(range(0, hp.batch_size), samples) # Generate wav files for i, mag in enumerate(mag_output): if i in z_list: # generate wav files #mag = mag*hp.mag_std + hp.mag_mean # denormalize #audio = spectrogram2wav(np.power(10, mag) ** hp.sharpening_factor) txt = x[i] txt = invert_text(txt) wav = spectrogram2wav(mag) write(cdir + "/{}_{}.wav".format(mname, i), hp.sr, wav) ret.append([txt, wav, typeS]) return ret