예제 #1
0
def create_write_files(ret,sess,g,x,mname,cdir,typeS):

    x = np.expand_dims(x, axis=0)
    mel_output = np.zeros((1, hp.T_y // hp.r, hp.n_mels * hp.r), np.float32)
    decoder_output = np.zeros((1, hp.T_y // hp.r, hp.embed_size), np.float32)
    alignments_li = np.zeros((hp.dec_layers, hp.T_x, hp.T_y//hp.r), np.float32)
    prev_max_attentions_li = np.zeros((hp.dec_layers, 1), np.int32)
    for j in range(hp.T_y // hp.r):
        _gs, _mel_output, _decoder_output, _max_attentions_li, _alignments_li = \
            sess.run([g.global_step, g.mel_output, g.decoder_output, g.max_attentions_li, g.alignments_li],
                     {g.x: x,
                      g.y1: mel_output,
                      g.prev_max_attentions_li:prev_max_attentions_li})
        mel_output[:, j, :] = _mel_output[:, j, :]
        decoder_output[:, j, :] = _decoder_output[:, j, :]
        prev_max_attentions_li = np.array(_max_attentions_li)[:, :, j]
       
    #mag_output = sess.run([g.mag_output], {g.mel_output: mel_output})
    mag_output = sess.run(g.mag_output, {g.decoder_output: decoder_output})

    
    x = np.squeeze(x, axis=0)
    txt = invert_text(x)
    mag_output = np.squeeze(mag_output[0])

    try:
        wav = spectrogram2wav(mag_output)
        wav, _ = librosa.effects.trim(wav)
        write(cdir + "/{}mag.wav".format(mname), hp.sr, wav)
        ret.append([txt,wav,typeS+"_world"])
    except Exception:
        sys.exc_clear()

    return ret
예제 #2
0
def create_write_files(ret, sess, g, x, mname, cdir, typeS):

    x = np.expand_dims(x, axis=0)
    x = np.append(x, np.zeros((hp.batch_size - 1, hp.T_x)), axis=0)
    mel_output = np.zeros((hp.batch_size, hp.T_y // hp.r, hp.n_mels * hp.r),
                          np.float32)
    _gs, mel_output = \
        sess.run([g.global_step, g.mel_output],
                 {g.x: x,
                  g.y1: mel_output})

    mag_output = sess.run(g.mag_output, {g.converter_input: mel_output})

    # x = np.squeeze(x[0], axis=0)
    x = x[0]
    txt = invert_text(x)
    mag_output = np.squeeze(mag_output[0])

    try:
        wav = spectrogram2wav(mag_output)
        wav, _ = librosa.effects.trim(wav)
        write(cdir + "/{}mag.wav".format(mname), hp.sr, wav)
        ret.append([txt, wav, typeS + "_world", mel_output, mag_output])
    except Exception:
        sys.exc_clear()

    return ret
예제 #3
0
def create_write_files_conv(ret, sess, mel_in, g, x, mname, cdir, typeS):

    mag_output = sess.run(g.mag_output, {g.converter_input: mel_in})

    txt = invert_text(x)
    mag_output = np.squeeze(mag_output[0])

    try:
        wav = spectrogram2wav(mag_output)
        wav, _ = librosa.effects.trim(wav)
        write(cdir + "/{}mag.wav".format(mname), hp.sr, wav)
        ret.append([txt, wav, typeS + "_world", mel_in, mag_output])
    except Exception:
        sys.exc_clear()

    return ret
예제 #4
0
def create_write_files(ret, sess, g, x, mname, cdir, samples, typeS):

    # for i in range(0, len(x), hp.batch_size):
    #     x2 = x[i:i+hp.batch_size]

    # Get melspectrogram
    mel_output = np.zeros((hp.batch_size, hp.T_y // hp.r, hp.n_mels * hp.r),
                          np.float32)
    decoder_output = np.zeros((hp.batch_size, hp.T_y // hp.r, hp.embed_size),
                              np.float32)
    alignments_li = np.zeros((hp.dec_layers, hp.T_x, hp.T_y // hp.r),
                             np.float32)
    prev_max_attentions_li = np.zeros((hp.dec_layers, hp.batch_size), np.int32)
    #alignments = np.zeros((hp.T_x, hp.T_y//hp.r), np.float32)
    for j in range((hp.T_y // hp.r) // hp.rwin):
        _gs, _mel_output, _decoder_output, _max_attentions_li, _alignments_li = \
            sess.run([g.global_step, g.mel_output, g.decoder_output, g.max_attentions_li, g.alignments_li],
                     {g.x: x,
                      g.y1: mel_output,
                      g.prev_max_attentions_li:prev_max_attentions_li})
        mel_output[:, j * hp.rwin:(j + 1) *
                   hp.rwin, :] = _mel_output[:,
                                             j * hp.rwin:(j + 1) * hp.rwin, :]
        decoder_output[:, j * hp.rwin:(j + 1) *
                       hp.rwin, :] = _decoder_output[:, j * hp.rwin:(j + 1) *
                                                     hp.rwin, :]
        #alignments_li[:, :, j*hp.rwin:(j+1)*hp.rwin] = np.array(_alignments_li)[:, :, j*hp.rwin:(j+1)*hp.rwin]
        prev_max_attentions_li = np.array(_max_attentions_li)[:, :,
                                                              j * hp.rwin]

    # Get magnitude
    mag_output = sess.run(g.mag_output, {g.decoder_output: decoder_output})
    z_list = random.sample(range(0, hp.batch_size), samples)

    # Generate wav files
    for i, mag in enumerate(mag_output):
        if i in z_list:
            # generate wav files
            #mag = mag*hp.mag_std + hp.mag_mean # denormalize
            #audio = spectrogram2wav(np.power(10, mag) ** hp.sharpening_factor)
            txt = x[i]
            txt = invert_text(txt)
            wav = spectrogram2wav(mag)
            write(cdir + "/{}_{}.wav".format(mname, i), hp.sr, wav)
            ret.append([txt, wav, typeS])
    return ret