Example #1
0
def say(sentence):
    new_sentence=" " .join([num2words(w) if w.isdigit()  else w for w in sentence.split()])
    normalized_sentence = "".join([c if c.lower() in vocab else '' for c in new_sentence])
    print(normalized_sentence)
    sentences = [normalized_sentence]
    max_N = len(normalized_sentence)
    L = torch.from_numpy(get_test_data(sentences, max_N))
    zeros = torch.from_numpy(np.zeros((1, hp.n_mels, 1), np.float32))
    Y = zeros
    A = None

    for t in range(hp.max_T):
      _, Y_t, A = text2mel(L, Y, monotonic_attention=True)
      Y = torch.cat((zeros, Y_t), -1)
      _, attention = torch.max(A[0, :, -1], 0)
      attention = attention.item()
      if L[0, attention] == vocab.index('E'):  # EOS
          break

    _, Z = ssrn(Y)
    i=int(0)
    Z = Z.cpu().detach().numpy()
    save_to_wav(Z[0, :, :].T, '%d.wav' % (i + 1))
    playsound('1.wav')
Example #2
0
    sys.exit(1)

# synthetize by one by one because there is a batch processing bug!
for i in range(len(SENTENCES)):
    sentences = [SENTENCES[i]]

    max_N = len(SENTENCES[i])
    L = torch.from_numpy(get_test_data(sentences, max_N))
    zeros = torch.from_numpy(np.zeros((1, hp.n_mels, 1), np.float32))
    Y = zeros
    A = None

    for t in tqdm(range(hp.max_T)):
        _, Y_t, A = text2mel(L, Y, monotonic_attention=True)
        Y = torch.cat((zeros, Y_t), -1)
        _, attention = torch.max(A[0, :, -1], 0)
        attention = attention.item()
        if L[0, attention] == vocab.index('E'):  # EOS
            break

    _, Z = ssrn(Y)

    Y = Y.cpu().detach().numpy()
    A = A.cpu().detach().numpy()
    Z = Z.cpu().detach().numpy()

    save_to_png('samples/%d-att.png' % (i + 1), A[0, :, :])
    save_to_png('samples/%d-mel.png' % (i + 1), Y[0, :, :])
    save_to_png('samples/%d-mag.png' % (i + 1), Z[0, :, :])
    save_to_wav(Z[0, :, :].T, 'samples/%d-wav.wav' % (i + 1))
    # text2mel = text2mel.eval()
    for sentence in SENTENCES:
        with torch.no_grad():
            L = torch.from_numpy(get_test_data(sentence)).to(device)
            zeros = torch.from_numpy(np.zeros((1, hp.n_mels, 1), np.float32)).to(device)
            Y = zeros
            # A = None

            while True:
                _, Y_t, A = text2mel(L, Y, monotonic_attention=True)
                Y = torch.cat((zeros, Y_t), -1)
                _, attention = torch.max(A[0, :, -1], 0)
                attention = attention.item()
                if L[0, attention] == vocab.index('E'):  # EOS
                    print(f'{sentence} ok!')
                    break

            _, Z = ssrn(Y)

            # Y = Y.cpu().detach().numpy()
            # A = A.cpu().detach().numpy()
            Z = Z.cpu().detach().numpy()
        if not os.path.isdir(f'samples/{folder}'):
            os.mkdir(f'samples/{folder}')
        if not os.path.isdir(f'samples/{folder}/{filename}'):
            os.mkdir(f'samples/{folder}/{filename}')
        # save_to_png('samples/%d-att.png' % (i + 1), A[0, :, :])
        # save_to_png('samples/%d-mel.png' % (i + 1), Y[0, :, :])
        # save_to_png('samples/%d-mag.png' % (i + 1), Z[0, :, :])
        save_to_wav(Z[0, :, :].T, f'samples/{folder}/{filename}/{sentence}.wav')
Example #4
0
    for t in tqdm(range(hp.max_T)):
        _, Y_t, A = text2mel(L, Y, speakers, monotonic_attention=True)
        Y = torch.cat((zeros, Y_t), -1)
        _, attention = torch.max(A[0, :, -1], 0)
        attention = attention.item()
        if L[0, attention] == vocab.index('E'):  # EOS
            break

    _, Z = ssrn(Y)

    Y = Y.cpu().detach().numpy()
    A = A.cpu().detach().numpy()
    Z = Z.cpu().detach().numpy()

    save_to_png('samples/%d-att.png' % (i + 1), A[0, :, :])
    save_to_png('samples/%d-mel.png' % (i + 1), Y[0, :, :])
    save_to_png('samples/%d-mag.png' % (i + 1), Z[0, :, :])
    # import matplotlib.pyplot as plt
    # a = self.embeddings(torch.tensor([x for x in range(10)]).cuda())
    # fig, ax = plt.subplots()
    # plt.imshow(A[0,:,:23])
    # labels = [item.get_text() for item in ax.get_xticklabels()]
    # labels=[x for x in sentence]
    #
    # ax.set_yticklabels(labels[::-1])
    # plt.show()
    # save_to_wav(Z[0, :, :].T, 'samples/%d-wav.wav' % (i + 1))
    print('saving for speaker: ', speaker)
    save_to_wav(Z[0, :, :].T, 'samples/%d-%s-wav.wav' % ((i + 1), speaker))
Example #5
0
    sentences = [SENTENCES[i]]

    max_N = len(SENTENCES[i])
    L = torch.from_numpy(get_test_data(sentences, max_N))
    zeros = torch.from_numpy(np.zeros((1, hp.n_mels, 1), np.float32))
    Y = zeros
    A = None

    for t in tqdm(range(hp.max_T)):
        _, Y_t, A = text2mel(L, Y, monotonic_attention=True)
        Y = torch.cat((zeros, Y_t), -1)
        _, attention = torch.max(A[0, :, -1], 0)
        attention = attention.item()
        if L[0, attention] == vocab.index('E'):  # EOS
            break

    _, Z = ssrn(Y)

    Y = Y.cpu().detach().numpy()
    A = A.cpu().detach().numpy()
    Z = Z.cpu().detach().numpy()

    save_to_png('samples/samples_bea_sleepiness_3k/%d-att.png' % (i + 1),
                A[0, :, :])
    save_to_png('samples/samples_bea_sleepiness_3k/%d-mel.png' % (i + 1),
                Y[0, :, :])
    save_to_png('samples/samples_bea_sleepiness_3k/%d-mag.png' % (i + 1),
                Z[0, :, :])
    save_to_wav(Z[0, :, :].T,
                'samples/samples_bea_sleepiness_3k/%d-wav.wav' % (i + 1))
    max_N = len(SENTENCES[i])
    L = torch.from_numpy(get_test_data(sentences, max_N))
    zeros = torch.from_numpy(np.zeros((1, hp.n_mels, 1), np.float32))
    Y = zeros
    A = None

    for t in tqdm(range(hp.max_T)):
        _, Y_t, A = text2mel(L, Y, monotonic_attention=True)
        Y = torch.cat((zeros, Y_t), -1)
        _, attention = torch.max(A[0, :, -1], 0)
        attention = attention.item()
        if L[0, attention] == vocab.index('E'):  # EOS
            break

    _, Z = ssrn(Y)

    Y = Y.cpu().detach().numpy()
    A = A.cpu().detach().numpy()
    Z = Z.cpu().detach().numpy()

    #print("Z", Z[0, :, :])

    save_dir = hp.synthesize_samples_dir
    file_name = save_dir + '/' + t2mel_step_str + "_" + ssrn_step_str
    os.makedirs(file_name, exist_ok=True)

    save_to_png(file_name + '/%d-att.png' % (i + 1), A[0, :, :])
    save_to_png(file_name + '/%d-mel.png' % (i + 1), Y[0, :, :])
    save_to_png(file_name + '/%d-mag.png' % (i + 1), Z[0, :, :])
    save_to_wav(Z[0, :, :].T, file_name + '/%d-wav.wav' % (i + 1))