Example #1
0
 def __init__(self, params, wavenet):
     self.params = params
     self.UPSAMPLE_COUNT = int(12.5 * params.target_sample_rate / 1000)
     self.RECEPTIVE_SIZE = 512  # this means 16ms
     self.MGC_SIZE = params.mgc_order
     self.dio = DatasetIO()
     self.vocoder = MelVocoder()
     self.wavenet = wavenet
     self.network = ParallelVocoderNetwork(receptive_field=self.RECEPTIVE_SIZE, filter_size=64).to(device)
     self.trainer = torch.optim.Adam(self.network.parameters(), lr=self.params.learning_rate)
Example #2
0
    def __init__(self, params, model=None, runtime=False):
        self.params = params
        self.HIDDEN_SIZE = [1000, 1000]

        self.FFT_SIZE = 513
        self.UPSAMPLE_COUNT = int(12.5 * params.target_sample_rate / 1000)
        self.FILTER_SIZE = 128

        self.sparse = False
        if model is None:
            self.model = dy.Model()
        else:
            self.model = model

        input_size = self.params.mgc_order  # self.UPSAMPLE_COUNT + self.params.mgc_order
        hidden_w = []
        hidden_b = []
        for layer_size in self.HIDDEN_SIZE:
            hidden_w.append(self.model.add_parameters((layer_size, input_size)))
            hidden_b.append(self.model.add_parameters((layer_size)))
            input_size = layer_size

        self.mlp_excitation = [hidden_w, hidden_b]

        input_size = self.params.mgc_order  # self.UPSAMPLE_COUNT + self.params.mgc_order
        hidden_w = []
        hidden_b = []
        for layer_size in self.HIDDEN_SIZE:
            hidden_w.append(self.model.add_parameters((layer_size, input_size)))
            hidden_b.append(self.model.add_parameters((layer_size)))
            input_size = layer_size

        self.mlp_filter = [hidden_w, hidden_b]

        input_size = self.params.mgc_order  # self.UPSAMPLE_COUNT + self.params.mgc_order
        hidden_w = []
        hidden_b = []
        for layer_size in self.HIDDEN_SIZE:
            hidden_w.append(self.model.add_parameters((layer_size, input_size)))
            hidden_b.append(self.model.add_parameters((layer_size)))
            input_size = layer_size

        self.mlp_vuv = [hidden_w, hidden_b]

        self.excitation_w = self.model.add_parameters((self.UPSAMPLE_COUNT + self.FILTER_SIZE - 1, input_size))
        self.excitation_b = self.model.add_parameters((self.UPSAMPLE_COUNT + self.FILTER_SIZE - 1))
        self.filter_w = self.model.add_parameters((self.FILTER_SIZE, input_size))
        self.filter_b = self.model.add_parameters((self.FILTER_SIZE))
        self.vuv_w = self.model.add_parameters((1, input_size))
        self.vuv_b = self.model.add_parameters((1))

        self.trainer = dy.AdamTrainer(self.model, alpha=params.learning_rate)
        self.dio = DatasetIO()
        self.vocoder = MelVocoder()
Example #3
0
    def __init__(self, params, model=None, runtime=False):
        self.params = params

        self.UPSAMPLE_COUNT = int(12.5 * params.target_sample_rate / 1000)
        self.RECEPTIVE_SIZE = 512  # this means 16ms

        self.dio = DatasetIO()
        self.vocoder = MelVocoder()

        self.network = VocoderNetwork(receptive_field=self.RECEPTIVE_SIZE, filter_size=256).to(device)
        self.trainer = torch.optim.Adam(self.network.parameters(), lr=self.params.learning_rate)
Example #4
0
    def __init__(self, params, model=None, runtime=False):
        self.params = params
        self.HIDDEN_SIZE = [1000, 1000]

        self.FFT_SIZE = 513
        self.UPSAMPLE_COUNT = int(12.5 * params.target_sample_rate / 1000)
        self.FILTER_SIZE = 128

        self.sparse = False
        self.dio = DatasetIO()
        self.vocoder = MelVocoder()

        self.network = VocoderNetwork(self.params.mgc_order,
                                      self.UPSAMPLE_COUNT).to(device)
        self.trainer = torch.optim.Adam(self.network.parameters(),
                                        lr=self.params.learning_rate)
Example #5
0
    def phase_1_prepare_corpus(params):
        from os import listdir
        from os.path import isfile, join
        from os.path import exists
        train_files_tmp = [
            f for f in listdir(params.train_folder)
            if isfile(join(params.train_folder, f))
        ]
        dev_files_tmp = [
            f for f in listdir(params.dev_folder)
            if isfile(join(params.dev_folder, f))
        ]

        sys.stdout.write("Scanning training files...")
        sys.stdout.flush()
        final_list = []
        for file in train_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.train_folder, lab_name)) and exists(
                    join(params.train_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        train_files = final_list
        sys.stdout.write(" found " + str(len(train_files)) +
                         " valid training files\n")
        sys.stdout.write("Scanning development files...")
        sys.stdout.flush()
        final_list = []
        for file in dev_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.dev_folder, lab_name)) and exists(
                    join(params.dev_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        dev_files = final_list
        sys.stdout.write(" found " + str(len(dev_files)) +
                         " valid development files\n")

        from io_modules.dataset import DatasetIO
        from io_modules.vocoder import MelVocoder
        from shutil import copyfile
        import pysptk
        dio = DatasetIO()
        vocoder = MelVocoder()
        base_folder = params.train_folder
        for index in range(len(train_files)):
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(train_files)))
            sys.stdout.flush()
            base_name = train_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/train', lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/train', lab_name))
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/train', txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/train', spc_name))
            dio.write_wave(
                join('data/processed/train', base_name + '.orig.wav'), data,
                sample_rate)
            array2file(mgc, join('data/processed/train', base_name + '.mgc'))

        sys.stdout.write('\n')
        base_folder = params.dev_folder
        for index in range(len(dev_files)):
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(dev_files)))
            sys.stdout.flush()
            base_name = dev_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/dev', lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/dev', lab_name))
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/dev/', txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/dev', spc_name))
            dio.write_wave(join('data/processed/dev', base_name + '.orig.wav'),
                           data, sample_rate)
            array2file(mgc, join('data/processed/dev', base_name + '.mgc'))

        sys.stdout.write('\n')
Example #6
0
    def phase_1_prepare_corpus(params):
        from os import listdir
        from os.path import isfile, join
        from os.path import exists
        train_files_tmp = [
            f for f in listdir(params.train_folder)
            if isfile(join(params.train_folder, f))
        ]
        if params.dev_folder is not None:
            dev_files_tmp = [
                f for f in listdir(params.dev_folder)
                if isfile(join(params.dev_folder, f))
            ]
        else:
            dev_files_tmp = []

        if params.g2p is not None:
            from models.g2p import G2P
            from io_modules.encodings import Encodings
            g2p_encodings = Encodings()
            g2p_encodings.load(params.g2p + '.encodings')
            g2p = G2P(g2p_encodings)
            g2p.load(params.g2p + '-bestAcc.network')
            if exists(params.g2p + '.lexicon'):
                g2p.load_lexicon(params.g2p + '.lexicon')
        else:
            g2p = None

        sys.stdout.write("Scanning training files...")
        sys.stdout.flush()
        final_list = []
        for file in train_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.train_folder, lab_name)) and exists(
                    join(params.train_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        train_files = final_list
        sys.stdout.write(" found " + str(len(train_files)) +
                         " valid training files\n")
        sys.stdout.write("Scanning development files...")
        sys.stdout.flush()
        final_list = []
        for file in dev_files_tmp:
            base_name = file[:-4]
            lab_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            if exists(join(params.dev_folder, lab_name)) and exists(
                    join(params.dev_folder, wav_name)):
                if base_name not in final_list:
                    final_list.append(base_name)

        dev_files = final_list
        sys.stdout.write(" found " + str(len(dev_files)) +
                         " valid development files\n")
        from io_modules.dataset import DatasetIO
        from io_modules.vocoder import MelVocoder
        from shutil import copyfile
        dio = DatasetIO()

        vocoder = MelVocoder()
        base_folder = params.train_folder
        total_files = 0
        for index in range(len(train_files)):
            total_files += 1
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(train_files)))
            sys.stdout.flush()
            base_name = train_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            tgt_txt_name = txt_name
            tgt_spc_name = spc_name
            tgt_lab_name = lab_name
            if params.prefix is not None:
                tgt_txt_name = params.prefix + "_{:05d}".format(
                    total_files) + '.txt'
                tgt_spc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.png'
                tgt_lab_name = params.prefix + "_{:05d}".format(
                    total_files) + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/train', tgt_lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/train', tgt_lab_name),
                                speaker_name=params.speaker,
                                g2p=g2p)
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/train', tgt_txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/train', tgt_spc_name))
            if params.prefix is None:
                dio.write_wave(
                    join('data/processed/train', base_name + '.orig.wav'),
                    data, sample_rate)
                array2file(mgc, join('data/processed/train',
                                     base_name + '.mgc'))
            else:
                tgt_wav_name = params.prefix + "_{:05d}".format(
                    total_files) + '.orig.wav'
                tgt_mgc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.mgc'
                dio.write_wave(join('data/processed/train', tgt_wav_name),
                               data, sample_rate)
                array2file(mgc, join('data/processed/train', tgt_mgc_name))

        sys.stdout.write('\n')
        base_folder = params.dev_folder
        for index in range(len(dev_files)):
            total_files += 1
            sys.stdout.write("\r\tprocessing file " + str(index + 1) + "/" +
                             str(len(dev_files)))
            sys.stdout.flush()
            base_name = dev_files[index]
            txt_name = base_name + '.txt'
            wav_name = base_name + '.wav'
            spc_name = base_name + '.png'
            lab_name = base_name + '.lab'

            tgt_txt_name = txt_name
            tgt_spc_name = spc_name
            tgt_lab_name = lab_name
            if params.prefix is not None:
                tgt_txt_name = params.prefix + "_{:05d}".format(
                    total_files) + '.txt'
                tgt_spc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.png'
                tgt_lab_name = params.prefix + "_{:05d}".format(
                    total_files) + '.lab'

            # LAB - copy or create
            if exists(join(base_folder, lab_name)):
                copyfile(join(base_folder, lab_name),
                         join('data/processed/dev', tgt_lab_name))
            else:
                create_lab_file(join(base_folder, txt_name),
                                join('data/processed/dev', tgt_lab_name),
                                speaker_name=params.speaker,
                                g2p=g2p)
            # TXT
            copyfile(join(base_folder, txt_name),
                     join('data/processed/dev', tgt_txt_name))
            # WAVE
            data, sample_rate = dio.read_wave(
                join(base_folder, wav_name),
                sample_rate=params.target_sample_rate)
            mgc = vocoder.melspectrogram(data,
                                         sample_rate=params.target_sample_rate,
                                         num_mels=params.mgc_order)
            # SPECT
            render_spectrogram(mgc, join('data/processed/dev', tgt_spc_name))
            if params.prefix is None:
                dio.write_wave(
                    join('data/processed/dev', base_name + '.orig.wav'), data,
                    sample_rate)
                array2file(mgc, join('data/processed/dev', base_name + '.mgc'))
            else:
                tgt_wav_name = params.prefix + "_{:05d}".format(
                    total_files) + '.orig.wav'
                tgt_mgc_name = params.prefix + "_{:05d}".format(
                    total_files) + '.mgc'
                dio.write_wave(join('data/processed/dev', tgt_wav_name), data,
                               sample_rate)
                array2file(mgc, join('data/processed/dev', tgt_mgc_name))

        sys.stdout.write('\n')