예제 #1
0
    def start_training(self,
                       itt_no_improve,
                       batch_size,
                       target_sample_rate,
                       params=None):
        epoch = 1
        left_itt = itt_no_improve
        dio = DatasetIO()
        self._render_devset()
        sys.stdout.write("\n")
        # self.synth_devset(batch_size, target_sample_rate)
        self.vocoder.store(self.target_output_path)

        num_files = 0
        while left_itt > 0:
            sys.stdout.write("Starting epoch " + str(epoch) + "\n")
            sys.stdout.write("Shuffling training data\n")
            from random import shuffle
            shuffle(self.trainset.files)
            file_index = 1
            total_loss = 0
            for file in self.trainset.files:
                num_files += 1
                sys.stdout.write("\t" + str(file_index) + "/" +
                                 str(len(self.trainset.files)) +
                                 " processing file " + file + '\n')
                sys.stdout.flush()
                wav_file = file + ".orig.wav"
                mgc_file = file + ".mgc.npy"
                mgc = np.load(mgc_file)
                file_index += 1
                data, sample_rate = dio.read_wave(wav_file)
                # wave_disc = data * 32768
                wave_disc = np.array(data, dtype=np.float32)

                import time
                start = time.time()
                loss = self.vocoder.learn(wave_disc, mgc, batch_size)
                total_loss += loss
                stop = time.time()
                sys.stdout.write(' avg loss=' + str(loss) +
                                 " execution time=" + str(stop - start))
                sys.stdout.write('\n')
                sys.stdout.flush()
                if file_index % 5000 == 0:
                    self.vocoder.store(self.target_output_path)
                    self.synth_devset(batch_size, target_sample_rate)

            self.vocoder.store(self.target_output_path)
            self.synth_devset(batch_size, target_sample_rate)

            epoch += 1
예제 #2
0
    def start_training(self, itt_no_improve, batch_size, target_sample_rate):
        epoch = 1
        left_itt = itt_no_improve
        dio = DatasetIO()
        self._render_devset()
        sys.stdout.write("\n")
        self.vocoder.store('data/models/rnn_vocoder')
        while left_itt > 0:
            sys.stdout.write("Starting epoch " + str(epoch) + "\n")
            sys.stdout.write("Shuffling training data\n")
            from random import shuffle
            shuffle(self.trainset.files)
            file_index = 1
            total_loss = 0
            for file in self.trainset.files:
                sys.stdout.write("\t" + str(file_index) + "/" +
                                 str(len(self.trainset.files)) +
                                 " processing file " + file)
                sys.stdout.flush()
                wav_file = file + ".orig.wav"
                mgc_file = file + ".mgc.npy"
                mgc = np.load(mgc_file)
                file_index += 1
                data, sample_rate = dio.read_wave(wav_file)
                if self.use_ulaw:
                    [wave_disc, ulaw_cont] = dio.ulaw_encode(data)
                else:
                    wave_disc = dio.b16_enc(data)
                import time
                start = time.time()
                loss = self.vocoder.learn(wave_disc, mgc, batch_size)
                total_loss += loss
                stop = time.time()
                sys.stdout.write(' avg loss=' + str(loss) +
                                 " execution time=" + str(stop - start))
                sys.stdout.write('\n')
                sys.stdout.flush()
                if file_index % 50 == 0:
                    self.synth_devset(batch_size, target_sample_rate)
                    self.vocoder.store('data/models/rnn_vocoder')

            self.synth_devset(batch_size, target_sample_rate)
            self.vocoder.store('data/models/rnn_vocoder')

            epoch += 1
예제 #3
0
    def phase_1_prepare_corpus(params):
        from os import listdir
        from os.path import isfile, join
        from os.path import exists
        train_files_tmp = [
            f for f in listdir(params.train_folder)
            if isfile(join(params.train_folder, f))
        ]
        dev_files_tmp = [
            f for f in listdir(params.dev_folder)
            if isfile(join(params.dev_folder, f))
        ]

        sys.stdout.write("Scanning training files...")
        sys.stdout.flush()
        final_list = []
        for file in train_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.train_folder, lab_name)) and exists(
                    join(params.train_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        train_files = final_list
        sys.stdout.write(" found " + str(len(train_files)) +
                         " valid training files\n")
        sys.stdout.write("Scanning development files...")
        sys.stdout.flush()
        final_list = []
        for file in dev_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.dev_folder, lab_name)) and exists(
                    join(params.dev_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        dev_files = final_list
        sys.stdout.write(" found " + str(len(dev_files)) +
                         " valid development files\n")

        from io_modules.dataset import DatasetIO
        from io_modules.vocoder import MelVocoder
        from shutil import copyfile
        import pysptk
        dio = DatasetIO()
        vocoder = MelVocoder()
        base_folder = params.train_folder
        for index in range(len(train_files)):
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(train_files)))
            sys.stdout.flush()
            base_name = train_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/train', lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/train', lab_name))
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/train', txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/train', spc_name))
            dio.write_wave(
                join('data/processed/train', base_name + '.orig.wav'), data,
                sample_rate)
            array2file(mgc, join('data/processed/train', base_name + '.mgc'))

        sys.stdout.write('\n')
        base_folder = params.dev_folder
        for index in range(len(dev_files)):
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(dev_files)))
            sys.stdout.flush()
            base_name = dev_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/dev', lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/dev', lab_name))
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/dev/', txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/dev', spc_name))
            dio.write_wave(join('data/processed/dev', base_name + '.orig.wav'),
                           data, sample_rate)
            array2file(mgc, join('data/processed/dev', base_name + '.mgc'))

        sys.stdout.write('\n')
예제 #4
0
    def start_training(self,
                       itt_no_improve,
                       batch_size,
                       target_sample_rate,
                       params=None):
        epoch = 1
        left_itt = itt_no_improve
        dio = DatasetIO()
        self._render_devset()
        sys.stdout.write("\n")

        if self.vocoder.sparse:
            print("Setting sparsity at: " + str(params.sparsity_step) + "%")
            sparsity = params.sparsity_step
            self.vocoder.rnnFine.set_sparsity(float(sparsity) / 100)
            self.vocoder.rnnCoarse.set_sparsity(float(sparsity) / 100)

        if self.vocoder.sparse:
            self.vocoder.store('data/models/rnn_vocoder_sparse')
        else:
            self.vocoder.store('data/models/rnn_vocoder')

        num_files = 0

        while left_itt > 0:
            sys.stdout.write("Starting epoch " + str(epoch) + "\n")
            sys.stdout.write("Shuffling training data\n")
            from random import shuffle
            shuffle(self.trainset.files)
            file_index = 1
            total_loss = 0
            for file in self.trainset.files:
                num_files += 1

                if num_files == params.sparsity_increase:
                    sparsity += params.sparsity_step
                    num_files = 0
                    if sparsity <= params.sparsity_target:
                        print("Setting sparsity at " + str(sparsity) + "%")
                        self.vocoder.rnnFine.set_sparsity(
                            float(sparsity) / 100)
                        self.vocoder.rnnCoarse.set_sparsity(
                            float(sparsity) / 100)
                    else:
                        sparsity = params.sparsity_target

                sys.stdout.write("\t" + str(file_index) + "/" +
                                 str(len(self.trainset.files)) +
                                 " processing file " + file)
                sys.stdout.flush()
                wav_file = file + ".orig.wav"
                mgc_file = file + ".mgc.npy"
                mgc = np.load(mgc_file)
                file_index += 1
                data, sample_rate = dio.read_wave(wav_file)
                if self.use_ulaw:
                    [wave_disc, ulaw_cont] = dio.ulaw_encode(data)
                else:
                    wave_disc = dio.b16_enc(data)
                import time
                start = time.time()
                loss = self.vocoder.learn(wave_disc, mgc, batch_size)
                total_loss += loss
                stop = time.time()
                sys.stdout.write(' avg loss=' + str(loss) +
                                 " execution time=" + str(stop - start))
                sys.stdout.write('\n')
                sys.stdout.flush()
                if file_index % 50 == 0:
                    self.synth_devset(batch_size, target_sample_rate)
                    if self.vocoder.sparse:
                        self.vocoder.store('data/models/rnn_vocoder_sparse')
                    else:
                        self.vocoder.store('data/models/rnn_vocoder')

            self.synth_devset(batch_size, target_sample_rate)
            if self.vocoder.sparse:
                self.vocoder.store('data/models/rnn_vocoder_sparse')
            else:
                self.vocoder.store('data/models/rnn_vocoder')

            epoch += 1
예제 #5
0
    def phase_1_prepare_corpus(params):
        from os import listdir
        from os.path import isfile, join
        from os.path import exists
        train_files_tmp = [
            f for f in listdir(params.train_folder)
            if isfile(join(params.train_folder, f))
        ]
        if params.dev_folder is not None:
            dev_files_tmp = [
                f for f in listdir(params.dev_folder)
                if isfile(join(params.dev_folder, f))
            ]
        else:
            dev_files_tmp = []

        if params.g2p is not None:
            from models.g2p import G2P
            from io_modules.encodings import Encodings
            g2p_encodings = Encodings()
            g2p_encodings.load(params.g2p + '.encodings')
            g2p = G2P(g2p_encodings)
            g2p.load(params.g2p + '-bestAcc.network')
            if exists(params.g2p + '.lexicon'):
                g2p.load_lexicon(params.g2p + '.lexicon')
        else:
            g2p = None

        sys.stdout.write("Scanning training files...")
        sys.stdout.flush()
        final_list = []
        for file in train_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.train_folder, lab_name)) and exists(
                    join(params.train_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        train_files = final_list
        sys.stdout.write(" found " + str(len(train_files)) +
                         " valid training files\n")
        sys.stdout.write("Scanning development files...")
        sys.stdout.flush()
        final_list = []
        for file in dev_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.dev_folder, lab_name)) and exists(
                    join(params.dev_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        dev_files = final_list
        sys.stdout.write(" found " + str(len(dev_files)) +
                         " valid development files\n")
        from io_modules.dataset import DatasetIO
        from io_modules.vocoder import MelVocoder
        from shutil import copyfile
        dio = DatasetIO()

        vocoder = MelVocoder()
        base_folder = params.train_folder
        total_files = 0
        for index in range(len(train_files)):
            total_files += 1
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(train_files)))
            sys.stdout.flush()
            base_name = train_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            tgt_txt_name = txt_name
            tgt_spc_name = spc_name
            tgt_lab_name = lab_name
            if params.prefix is not None:
                tgt_txt_name = params.prefix + "_{:05d}".format(
                    total_files) + '.txt'
                tgt_spc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.png'
                tgt_lab_name = params.prefix + "_{:05d}".format(
                    total_files) + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/train', tgt_lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/train', tgt_lab_name),
                                speaker_name=params.speaker,
                                g2p=g2p)
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/train', tgt_txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/train', tgt_spc_name))
            if params.prefix is None:
                dio.write_wave(
                    join('data/processed/train', base_name + '.orig.wav'),
                    data, sample_rate)
                array2file(mgc, join('data/processed/train',
                                     base_name + '.mgc'))
            else:
                tgt_wav_name = params.prefix + "_{:05d}".format(
                    total_files) + '.orig.wav'
                tgt_mgc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.mgc'
                dio.write_wave(join('data/processed/train', tgt_wav_name),
                               data, sample_rate)
                array2file(mgc, join('data/processed/train', tgt_mgc_name))

        sys.stdout.write('\n')
        base_folder = params.dev_folder
        for index in range(len(dev_files)):
            total_files += 1
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(dev_files)))
            sys.stdout.flush()
            base_name = dev_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            tgt_txt_name = txt_name
            tgt_spc_name = spc_name
            tgt_lab_name = lab_name
            if params.prefix is not None:
                tgt_txt_name = params.prefix + "_{:05d}".format(
                    total_files) + '.txt'
                tgt_spc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.png'
                tgt_lab_name = params.prefix + "_{:05d}".format(
                    total_files) + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/dev', tgt_lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/dev', tgt_lab_name),
                                speaker_name=params.speaker,
                                g2p=g2p)
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/dev', tgt_txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/dev', tgt_spc_name))
            if params.prefix is None:
                dio.write_wave(
                    join('data/processed/dev', base_name + '.orig.wav'), data,
                    sample_rate)
                array2file(mgc, join('data/processed/dev', base_name + '.mgc'))
            else:
                tgt_wav_name = params.prefix + "_{:05d}".format(
                    total_files) + '.orig.wav'
                tgt_mgc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.mgc'
                dio.write_wave(join('data/processed/dev', tgt_wav_name), data,
                               sample_rate)
                array2file(mgc, join('data/processed/dev', tgt_mgc_name))

        sys.stdout.write('\n')