コード例 #1
0
ファイル: synthesize.py プロジェクト: wyb330/Tacotron-2
def run_eval(args, checkpoint_path, output_dir, hparams, sentences):
    eval_dir = os.path.join(output_dir, 'eval')
    log_dir = os.path.join(output_dir, 'logs-eval')

    if args.model in ('Both', 'Tacotron-2'):
        assert os.path.normpath(eval_dir) == os.path.normpath(
            args.mels_dir)  # mels_dir = wavenet_input_dir

    # Create output path if it doesn't exist
    os.makedirs(eval_dir, exist_ok=True)
    os.makedirs(log_dir, exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'wavs'), exist_ok=True)
    os.makedirs(os.path.join(log_dir, 'plots'), exist_ok=True)

    log(hparams_debug_string())
    synth = Synthesizer()
    synth.load(checkpoint_path, hparams)

    with open(os.path.join(eval_dir, 'map.txt'), 'w') as file:
        for i, text in enumerate(tqdm(sentences)):
            if is_korean_text(text):
                text = normalize_number(text)
                # 한글을 자소 단위로 쪼갠다.
                text = split_to_jamo(text, hparams.cleaners)
            mel_filename = synth.synthesize(text, i + 1, eval_dir, log_dir,
                                            None)

            file.write('{}|{}\n'.format(text, mel_filename))
    log('synthesized mel spectrograms at {}'.format(eval_dir))
    return eval_dir
コード例 #2
0
ファイル: app.py プロジェクト: wyb330/Tacotron-2
def view_method():
    text = request.args.get('text')
    if is_korean_text(text):
        text = normalize_number(text)
        text = split_to_jamo(text, hparams.cleaners)
    speaker_id = int(request.args.get('speaker_id'))

    if text:
        return generate_audio_response(text, speaker_id)
    else:
        return jsonify(success=True), 200
コード例 #3
0
ファイル: feeder.py プロジェクト: wyb330/Tacotron-2
    def _get_next_example(self):
        """Gets a single example (input, mel_target, token_target, linear_target, mel_length) from_ disk
        """
        if self._train_offset >= len(self._train_meta):
            self._train_offset = 0
            np.random.shuffle(self._train_meta)

        meta = self._train_meta[self._train_offset]
        self._train_offset += 1

        speaker_id = int(meta[5])
        text = meta[6]
        if is_korean_text(text):
            text = normalize_number(text)
            # 한글을 자소 단위로 쪼갠다.
            text = split_to_jamo(text, self._cleaner_names)

        input_data = np.asarray(text_to_sequence(text, self._cleaner_names), dtype=np.int32)
        mel_target = np.load(os.path.join(self._mel_dir, meta[1]))
        # Create parallel sequences containing zeros to represent a non finished sequence
        token_target = np.asarray([0.] * (len(mel_target) - 1))
        linear_target = np.load(os.path.join(self._linear_dir, meta[2]))
        return (input_data, mel_target, token_target, linear_target, speaker_id, len(mel_target))
コード例 #4
0
ファイル: synthesizer.py プロジェクト: wyb330/Tacotron-2
    def synthesize(self, text, index, out_dir, log_dir, mel_filename, speaker_id):
        hparams = self._hparams
        cleaner_names = [x.strip() for x in hparams.cleaners.split(',')]
        if is_korean_text(text):
            text = normalize_number(text)
            text = split_to_jamo(text, cleaner_names)
        seq = text_to_sequence(text, cleaner_names)
        feed_dict = {
            self.model.inputs: [np.asarray(seq, dtype=np.int32)],
            self.model.input_lengths: np.asarray([len(seq)], dtype=np.int32),
            self.model.speaker_ids: np.asarray([speaker_id], dtype=np.int32)
        }

        if self.gta:
            feed_dict[self.model.mel_targets] = np.load(mel_filename).reshape(1, -1, 80)

        if self.gta or not hparams.predict_linear:
            mels, alignment = self.session.run([self.mel_outputs, self.alignment], feed_dict=feed_dict)

        else:
            linear, mels, alignment = self.session.run([self.linear_outputs, self.mel_outputs, self.alignment],
                                                       feed_dict=feed_dict)
            linear = linear.reshape(-1, hparams.num_freq)

        mels = mels.reshape(-1, hparams.num_mels)  # Thanks to @imdatsolak for pointing this out

        if index is None:
            # Generate wav and read it
            wav = audio.inv_mel_spectrogram(mels.T, hparams)
            audio.save_wav(wav, 'temp.wav', sr=hparams.sample_rate)  # Find a better way

            chunk = 512
            f = wave.open('temp.wav', 'rb')
            p = pyaudio.PyAudio()
            stream = p.open(format=p.get_format_from_width(f.getsampwidth()),
                            channels=f.getnchannels(),
                            rate=f.getframerate(),
                            output=True)
            data = f.readframes(chunk)
            while data:
                stream.write(data)
                data = f.readframes(chunk)

            stream.stop_stream()
            stream.close()

            p.terminate()
            return

        # Write the spectrogram to disk
        # Note: outputs mel-spectrogram files and target ones have same names, just different folders
        mel_filename = os.path.join(out_dir, 'speech-mel-{:05d}.npy'.format(index))
        np.save(mel_filename, mels, allow_pickle=False)

        if log_dir is not None:
            # save wav (mel -> wav)
            wav = audio.inv_mel_spectrogram(mels.T, hparams)
            audio.save_wav(wav, os.path.join(log_dir, 'wavs/speech-wav-{:05d}-mel.wav'.format(index)),
                           sr=hparams.sample_rate)

            if hparams.predict_linear:
                # save wav (linear -> wav)
                wav = audio.inv_linear_spectrogram(linear.T, hparams)
                audio.save_wav(wav, os.path.join(log_dir, 'wavs/speech-wav-{:05d}-linear.wav'.format(index)),
                               sr=hparams.sample_rate)

            if is_korean_char(text):
                text = j2h(text)
            # save alignments
            plot.plot_alignment(alignment, os.path.join(log_dir, 'plots/speech-alignment-{:05d}.png'.format(index)),
                                info='{}'.format(text), split_title=True)

            # save mel spectrogram plot
            plot.plot_spectrogram(mels, os.path.join(log_dir, 'plots/speech-mel-{:05d}.png'.format(index)),
                                  info='{}'.format(text), split_title=True)

        return mel_filename