Exemple #1
0
    def synthesize(self, args, text, idx):
        seq = text_to_sequence(text)
        dec_input = np.zeros((1, 200, mel_dim), dtype='float32')
        pred = []
        for i in range(1, 201):
            mel_out, alignment = self.session.run(
                [self.mel_output, self.alignment],
                feed_dict={
                    self.model.enc_input: [np.asarray(seq, dtype=np.int32)],
                    self.model.sequence_length:
                    np.asarray([len(seq)], dtype=np.int32),
                    self.model.dec_input:
                    dec_input
                })
            if i < 200:
                dec_input[:, i, :] = mel_out[5 * i - 1, :]
            pred.extend(mel_out[5 * (i - 1):5 * i, :])

        np.save(os.path.join(args.save_dir, 'mel-{}'.format(idx)),
                pred,
                allow_pickle=False)

        input_seq = sequence_to_text(seq)
        alignment_dir = os.path.join(args.save_dir, 'align-{}.png'.format(idx))
        plot_alignment(alignment, alignment_dir, input_seq)
Exemple #2
0
def test_step(text, idx):
    seq = text_to_sequence(text)
    enc_input = np.asarray([seq], dtype=np.int32)
    sequence_length = np.asarray([len(seq)], dtype=np.int32)
    dec_input = np.zeros((1, max_iter, mel_dim), dtype=np.float32)

    pred = []
    for i in range(1, max_iter + 1):
        mel_out, alignment = model(enc_input,
                                   sequence_length,
                                   dec_input,
                                   is_training=False)
        if i < max_iter:
            dec_input[:, i, :] = mel_out[:, reduction * i - 1, :]
        pred.extend(mel_out[:, reduction * (i - 1):reduction * i, :])

    pred = np.reshape(np.asarray(pred), [-1, mel_dim])
    alignment = np.squeeze(alignment, axis=0)

    np.save(os.path.join(save_dir, 'mel-{}'.format(idx)),
            pred,
            allow_pickle=False)

    input_seq = sequence_to_text(seq)
    alignment_dir = os.path.join(save_dir, 'align-{}.png'.format(idx))
    plot_alignment(alignment, alignment_dir, input_seq)
Exemple #3
0
def get_mfccs_and_phones(wav_file, mode='train'):
    n_fft, hop_length, win_length = _stft_parameters()
    mfccs = np.load(wav_file)
    num_timesteps = mfccs.shape[0]
    (rpath, temp) = os.path.split(wav_file)
    (name, _) = os.path.splitext(temp)
    phn_file = name + ".PHN"
    phn_file = os.path.join(rpath, phn_file)

    # get sentence
    sentence_file = phn_file.replace(".PHN", ".TXT")
    with open(sentence_file) as f:
        sentence = " ".join(f.readline().strip().split(" ")[2:])
    sentence = np.asarray(text_to_sequence(sentence), dtype=np.int32)
    # get phones
    phn2idx, idx2phn = load_vocab()
    phns = np.zeros(shape=(num_timesteps, ))
    bnd_list = []
    for line in open(phn_file, 'r').read().splitlines():
        start_point, end_point, phn = line.split()
        bnd = int(start_point) // hop_length
        if phn in mapping_48.keys():
            phn = mapping_48[phn]
        phns[bnd:] = phn2idx[phn]
        bnd_list.append(bnd)
    if mode == 'train':
        if hparams.aug == True:
            mfccs = spec_augment(mfccs)
        return mfccs, sentence, phns, len(phns), len(sentence)
    else:
        return wav_file, mfccs, sentence, phns, len(phns), len(sentence)
    def _get_next_example(self):
        if self._offset >= len(self._metadata):  # 배치 초기화
            self._offset = 0
            random.shuffle(self._metadata)
        meta = self._metadata[self._offset]
        self._offset += 1

        text = meta[3]
        input_data = np.asarray(text_to_sequence(text), dtype=np.int32)
        linear_target = np.load(os.path.join(self._datadir, meta[0]))
        mel_target = np.load(os.path.join(self._datadir, meta[1]))
        return (input_data, mel_target, linear_target, len(linear_target))
Exemple #5
0
 def synthesize(self, text):
   # 将中文转换为注音字符
   text = Pinyin().get_pinyin(text, " ", tone_marks='numbers')
   cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
   # 注音字符到序列
   seq = text_to_sequence(text, cleaner_names)
   feed_dict = {
     self.model.inputs: [np.asarray(seq, dtype=np.int32)],
     self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)}
   wav = self.session.run(self.wav_output, feed_dict=feed_dict)
   wav = audio.inv_preemphasis(wav)
   out = io.BytesIO()
   audio.save_wav(wav, out)
   return out.getvalue()
Exemple #6
0
 def synthesize(self, text, base_path, idx):
     seq = text_to_sequence(text)
     feed_dict = {
         self.model.inputs: [np.asarray(seq, dtype=np.int32)],
         self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32)
     }
     input_seq, wav, alignment = self.session.run(
         [self.inputs, self.wav_output, self.alignments],
         feed_dict=feed_dict)
     wav = audio.inv_preemphasis(wav)
     wav = wav[:audio.find_endpoint(wav)]
     out = io.BytesIO()
     audio.save_wav(wav, out)
     input_seq = sequence_to_text(input_seq)
     plot.plot_alignment(alignment, '%s-%d-align.png' % (base_path, idx),
                         input_seq)
     return out.getvalue()
Exemple #7
0
metadata = pd.read_csv(text_dir[0], dtype='object', sep='|', header=None)
wav_dir = metadata[0].values
text = metadata[3].values

out_dir = './data'
os.makedirs(out_dir, exist_ok=True)
os.makedirs(out_dir + '/text', exist_ok=True)
os.makedirs(out_dir + '/mel', exist_ok=True)
os.makedirs(out_dir + '/spec', exist_ok=True)
os.makedirs(out_dir + '/dec', exist_ok=True)

# text
print('Load Text')
for idx, s in enumerate(tqdm(text)):
    sentence = re.sub(re.compile(filters), '', s)
    sentence = text_to_sequence(sentence)
    text_name = 'kss-text-%05d.npy' % idx
    np.save(os.path.join(out_dir + '/text', text_name),
            sentence,
            allow_pickle=False)
print('Text Done')

# audio
print('Load Audio')
mel_len_list = []
for idx, fn in enumerate(tqdm(wav_dir)):
    file_dir = './kss/' + fn
    wav, _ = librosa.load(file_dir, sr=sample_rate)
    wav, _ = librosa.effects.trim(wav)
    wav = np.append(wav[0], wav[1:] - preemphasis * wav[:-1])
    stft = librosa.stft(wav,