Python ModelTrainerの例、idiaptts.src.model_trainers.ModelTrainer.ModelTrainer Pythonの例

コード例 #1

0

ファイルを表示

ファイル: AtomModelTrainer.py プロジェクト: sshuster/IdiapTTS

    def synth_phrase(self, file_id_list, hparams):
        # Create reference audio files containing only the vocoder degradation.
        self.logger.info("Synthesise phrase curve for [{0}].".format(", ".join(
            [id_name for id_name in file_id_list])))

        # Create an empty dictionary which can be filled with extracted audio features.
        synth_output = dict()
        for id_name in file_id_list:
            synth_output[id_name] = None
        # Fill dictionary with extracted audio features.
        full_output = self.load_extracted_audio_features(synth_output, hparams)

        # Override the lf0 component by the phrase curve.
        for id_name in file_id_list:
            labels = full_output[id_name]
            phrase_curve = np.fromfile(
                os.path.join(self.OutputGen.dir_labels,
                             id_name + self.OutputGen.ext_phrase),
                dtype=np.float32)[:len(full_output[id_name])]
            labels[:, -3] = phrase_curve[:len(labels)]

        # Add identifier to suffix.
        old_synth_file_suffix = hparams.synth_file_suffix
        hparams.synth_file_suffix += '_phrase'

        # Run the vocoder.
        ModelTrainer.synthesize(self, file_id_list, full_output, hparams)

        # Restore identifier.
        hparams.synth_file_suffix = old_synth_file_suffix

コード例 #2

0

ファイルを表示

    def test_split_return_values_torch(self):
        seq_length_output = numpy.array([10, 5])
        output = torch.ones(seq_length_output.max(), 2, 4)

        with unittest.mock.patch.object(ModelTrainer.logger, "error") as mock_logger:
            with self.assertRaises(TypeError):
                ModelTrainer._split_return_values(output, seq_length_output, None, False)
                mock_logger.assert_called_with("No best model exists yet. Continue with the current one.")

コード例 #3

0

ファイルを表示

    def test_split_return_values(self):
        seq_length_output = numpy.array([10, 6, 8])
        batch_size = 3
        feature_dim = 50
        output = numpy.empty((seq_length_output.max(), batch_size, feature_dim))
        hidden1 = numpy.empty((seq_length_output.max(), batch_size, 2))
        hidden2 = numpy.empty((seq_length_output.max(), batch_size, 4))
        for idx in range(batch_size):
            output[:, idx] = idx
            hidden1[:, idx] = idx * 10
            hidden2[:, idx] = idx * 100
        hidden = (hidden1, hidden2)
        batch = (output, hidden)

        split_batch = ModelTrainer._split_return_values(batch, seq_length_output, None, False)

        for idx in range(batch_size):
            b = split_batch[idx]
            out = b[0]
            h = b[1]
            h1 = h[0]
            h2 = h[1]

            self.assertTrue((out == idx).all(), msg="Output of batch {} is wrong, expected was all values being {}.".format(idx, idx))
            self.assertTrue((h1 == idx * 10).all(), msg="Hidden1 of batch {} is wrong, expected was all values being {}.".format(idx, idx * 10))
            self.assertTrue((h2 == idx * 100).all(), msg="Hidden2 of batch {} is wrong, expected was all values being {}.".format(idx, idx * 100))

コード例 #4

0

ファイルを表示

    def test_input_to_str_list(self):
        # Tuple input but elements are not strings.
        out = ModelTrainer._input_to_str_list((121, 122))
        self.assertEqual(["121", "122"], out)

        # Valid path to file id list.
        out = ModelTrainer._input_to_str_list(os.path.join("integration", "fixtures", "file_id_list.txt"))
        self.assertEqual(TestModelTrainer._get_id_list(), out)

        # Single input id.
        out = ModelTrainer._input_to_str_list("121")
        self.assertEqual(["121"], out)

        # Wrong input.
        with self.assertRaises(ValueError):
            ModelTrainer._input_to_str_list(numpy.array([1, 2]))

コード例 #5

0

ファイルを表示

    def test_embeddings_everywhere(self):
        hparams = ModelTrainer.create_hparams()
        num_emb = 3
        emb_dim = 12
        in_dim = 42
        out_dim = 12
        hparams.add_hparam("f_get_emb_index", [lambda x: 0])
        hparams.model_type = "RNNDYN-{}x{}_EMB_(-1)-3_RELU_128-2_BiLSTM_32-1_FC_12".format(
            num_emb, emb_dim)
        model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim,
                                    hparams)

        self.assertEqual(1, len(model.emb_groups))
        self.assertEqual(torch.Size([num_emb, emb_dim]),
                         model.emb_groups[0].weight.shape)
        self.assertEqual(torch.Size([128, in_dim - 1 + emb_dim]),
                         model[0].weight.shape)
        self.assertEqual(torch.Size([128, 128 + emb_dim]),
                         model[1].weight.shape)

        self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]),
                         model[3].weight_ih_l0.shape)
        self.assertEqual(torch.Size([32 * 4, 32 * 2 + emb_dim]),
                         model[4].weight_ih_l0_reverse.shape)
        pass

コード例 #6

0

ファイルを表示

    def create_hparams(hparams_string=None, verbose=False):
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)

        hparams.add_hparams(
            thetas=None,  # One initial theta value per filter.
            k=2,  # Order of the impulse response of the atoms.
            min_atom_amp=
            0.25,  # Post-processing removes atoms with an absolute amplitude smaller than this.
            complex_poles=True,  # Comples poles possible.
            phase_init=0.0,  # Initial phase of the filters.
            vuv_loss_weight=1.0,  # Weight of the VUV RMSE.
            L1_loss_weight=1.0,  # Weight of the L1 loss on the spiking inputs.
            weight_unvoiced=0.5,  # Weight on unvoiced frames.
            num_questions=None,  # Dimension of the input questions.
            dist_window_size=
            51,  # Size of distribution around spikes when training the AtomModel.
            phrase_bias_init=
            0.0,  # Initial bias of neural filter, should be estimated mean of speaker's LF0.
            atom_model_path=None,  # Path to load a pre-trained atom model from.
            hparams_atom=
            None,  # Hyper-parameter container used in the AtomModelTrainer
            flat_model_path=
            None,  # Path to load a pre-trained atom neural filter model from (without phrase curve).
            hparams_flat=
            None,  # Hyper-parameter container used in the AtomNeuralFilterModelTrainer.
        )

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams

コード例 #7

0

ファイルを表示

    def test_save_load_equality(self):
        hparams = ModelTrainer.create_hparams()
        hparams.out_dir = os.path.join(
            self.out_dir,
            "test_save_load_equality")  # Add function name to path.
        model_path = os.path.join(hparams.out_dir, "test_model.nn")

        # Create a new model and save it.
        dim_in, dim_out = 10, 4
        total_epochs = 10
        model_handler = ModelHandlerPyTorch()
        model_handler.model = torch.nn.Sequential(
            torch.nn.Linear(dim_in, dim_out))
        model_handler.save_checkpoint(model_path, total_epochs)

        # Create a new model handler and test load save.
        hparams.model_type = None
        model_handler = ModelHandlerPyTorch()
        saved_total_epochs = model_handler.load_checkpoint(model_path, hparams)
        self.assertEqual(total_epochs,
                         saved_total_epochs,
                         msg="Saved and loaded total epochs do not match")
        model_copy_path = os.path.join(hparams.out_dir, "test_model_copy.nn")
        model_handler.save_checkpoint(model_copy_path, total_epochs)

        # self.assertTrue(filecmp.cmp(model_path, model_copy_path, False))  # This does not work.
        self.assertTrue(equal_checkpoint(model_path, model_copy_path),
                        "Loaded and saved models are not the same.")

        shutil.rmtree(hparams.out_dir)

コード例 #8

0

ファイルを表示

    def _load_pre_net(self, hparams):
        from idiaptts.src.neural_networks.pytorch.ModelHandlerPyTorch import ModelHandlerPyTorch
        from idiaptts.src.model_trainers.ModelTrainer import ModelTrainer

        model_path = ModelTrainer.get_model_path(hparams)
        self.pre_net, *_ = ModelHandlerPyTorch.load_model(model_path,
                                                          hparams,
                                                          verbose=True)

コード例 #9

0

ファイルを表示

    def decollate_network_output(output,
                                 _,
                                 seq_lengths=None,
                                 permutation=None,
                                 batch_first=True):
        """Split output into LF0, V/UV and command signals. Return command signals as hidden state."""

        # Split pre-net output (command signals).
        intern_amps, _ = ModelTrainer.split_batch(output[:, :, 2:], None,
                                                  seq_lengths, permutation,
                                                  batch_first)
        # Split final LF0, V/UV.
        output, _ = ModelTrainer.split_batch(output[:, :, :2], None,
                                             seq_lengths, permutation,
                                             batch_first)

        return output, intern_amps

コード例 #10

0

ファイルを表示

ファイル: test_ModelTrainer.py プロジェクト: sshuster/IdiapTTS

    def _get_trainer(self, hparams):
        dir_world_features = "integration/fixtures/WORLD"
        dir_question_labels = "integration/fixtures/questions"

        trainer = ModelTrainer(self.id_list, hparams)

        # Create datasets to work on.
        trainer.InputGen = QuestionLabelGen(dir_question_labels,
                                            hparams.num_questions)
        trainer.InputGen.get_normalisation_params(dir_question_labels)

        trainer.OutputGen = WorldFeatLabelGen(
            dir_world_features,
            num_coded_sps=hparams.num_coded_sps,
            add_deltas=True)
        trainer.OutputGen.get_normalisation_params(dir_world_features)

        trainer.dataset_train = LabelGensDataset(trainer.id_list_train,
                                                 trainer.InputGen,
                                                 trainer.OutputGen,
                                                 hparams,
                                                 match_lengths=True)
        trainer.dataset_val = LabelGensDataset(trainer.id_list_val,
                                               trainer.InputGen,
                                               trainer.OutputGen,
                                               hparams,
                                               match_lengths=True)

        trainer.loss_function = torch.nn.MSELoss(reduction='none')

        return trainer

コード例 #11

0

ファイルを表示

ファイル: AtomVUVDistPosModelTrainer.py プロジェクト: sshuster/IdiapTTS

    def synthesize(self, id_list, synth_output, hparams):
        """
        Synthesise LF0 from atoms. The run_atom_synth function either loads the original acoustic features or uses an
        acoustic model to predict them.
        """
        full_output = self.run_atom_synth(id_list, synth_output, hparams)

        for id_name, labels in full_output.items():
            lf0 = labels[:, -3]
            lf0, _ = interpolate_lin(lf0)
            vuv = synth_output[id_name][:, 0, 1]
            len_diff = len(labels) - len(vuv)
            labels = WorldFeatLabelGen.trim_end_sample(labels, int(len_diff / 2), reverse=True)
            labels = WorldFeatLabelGen.trim_end_sample(labels, len_diff - int(len_diff / 2))
            labels[:, -2] = vuv

        # Run the vocoder.
        ModelTrainer.synthesize(self, id_list, full_output, hparams)

コード例 #12

0

ファイルを表示

ファイル: AcousticModelTrainer.py プロジェクト: sshuster/IdiapTTS

    def synthesize(self, id_list, synth_output, hparams):
        """
        Depending on hparams override the network output with the extracted features,
        then continue with normal synthesis pipeline.
        """

        if hparams.synth_load_org_sp\
                or hparams.synth_load_org_lf0\
                or hparams.synth_load_org_vuv\
                or hparams.synth_load_org_bap:
            for id_name in id_list:

                world_dir = hparams.world_dir if hasattr(hparams, "world_dir") and hparams.world_dir is not None\
                                              else os.path.join(self.OutputGen.dir_labels,
                                                                self.dir_extracted_acoustic_features)
                labels = WorldFeatLabelGen.load_sample(
                    id_name, world_dir, num_coded_sps=hparams.num_coded_sps)
                len_diff = len(labels) - len(synth_output[id_name])
                if len_diff > 0:
                    labels = WorldFeatLabelGen.trim_end_sample(labels,
                                                               int(len_diff /
                                                                   2),
                                                               reverse=True)
                    labels = WorldFeatLabelGen.trim_end_sample(
                        labels, len_diff - int(len_diff / 2))

                if hparams.synth_load_org_sp:
                    synth_output[
                        id_name][:len(labels), :self.OutputGen.
                                 num_coded_sps] = labels[:, :self.OutputGen.
                                                         num_coded_sps]

                if hparams.synth_load_org_lf0:
                    synth_output[id_name][:len(labels), -3] = labels[:, -3]

                if hparams.synth_load_org_vuv:
                    synth_output[id_name][:len(labels), -2] = labels[:, -2]

                if hparams.synth_load_org_bap:
                    synth_output[id_name][:len(labels), -1] = labels[:, -1]

        # Run the vocoder.
        ModelTrainer.synthesize(self, id_list, synth_output, hparams)

コード例 #13

0

ファイルを表示

    def synthesize(self, id_list, synth_output, hparams):
        """Save output of model to .lf0 and (.vuv) files and call Merlin synth which reads those files."""

        # Reconstruct lf0 from generated atoms and write it to synth output.
        # recon_dict = self.get_recon_from_synth_output(synth_output)
        full_output = dict()
        for id_name, labels in synth_output.items():
            # Take lf0 and vuv from network output.
            lf0 = labels[:, 0]
            vuv = labels[:, 1]

            vuv[vuv < 0.5] = 0.0
            vuv[vuv >= 0.5] = 1.0

            # Get mgc, vuv and bap data either through a trained acoustic model or from data extracted from the audio.
            if hparams.synth_acoustic_model_path is None:
                world_dir = hparams.world_dir if hasattr(hparams, "world_dir") and hparams.world_dir is not None\
                                              else os.path.realpath(os.path.join(hparams.out_dir, self.dir_extracted_acoustic_features))
                full_sample: np.ndarray = WorldFeatLabelGen.load_sample(
                    id_name,
                    world_dir,
                    add_deltas=False,
                    num_coded_sps=hparams.num_coded_sps,
                    num_bap=hparams.num_bap)  # Load extracted data.
                len_diff = len(full_sample) - len(lf0)
                trim_front = len_diff // 2
                trim_end = len_diff - trim_front
                full_sample = WorldFeatLabelGen.trim_end_sample(
                    full_sample, trim_end)
                full_sample = WorldFeatLabelGen.trim_end_sample(full_sample,
                                                                trim_front,
                                                                reverse=True)
            else:
                raise NotImplementedError()

            # Overwrite lf0 and vuv by network output.
            full_sample[:, hparams.num_coded_sps] = lf0
            full_sample[:, hparams.num_coded_sps + 1] = vuv
            # Fill a dictionary with the samples.
            full_output[id_name + "_E2E_Phrase"] = full_sample

        # Run the vocoder.
        ModelTrainer.synthesize(self, id_list, full_output, hparams)

コード例 #14

0

ファイルを表示

ファイル: AtomModelTrainer.py プロジェクト: sshuster/IdiapTTS

    def create_hparams(hparams_string=None, verbose=False):
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)

        hparams.add_hparams(thetas=None,
                            k=None,
                            min_atom_amp=0.3,
                            num_questions=None)

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams

コード例 #15

0

ファイルを表示

ファイル: DurationModelTrainer.py プロジェクト: sshuster/IdiapTTS

    def create_hparams(hparams_string=None, verbose=False):
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)
        hparams.add_hparams(  # exclude_begin_and_end_silence=False,
            min_phoneme_length=50000,
            phoneme_label_type="HTK full"
        )  # Specifies the format in which the .lab files are stored.
        # Refer to PhonemeLabelGen.load_sample for a list of possible types.

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams

コード例 #16

0

ファイルを表示

    def test_get_item(self):
        hparams = ModelTrainer.create_hparams()
        num_emb = 3
        emb_dim = 12
        in_dim = 42
        out_dim = 12
        hparams.add_hparam("f_get_emb_index", [lambda x: 0])
        hparams.model_type = "RNNDYN-{}x{}_EMB_(0, 3, 5, 7)-5_RELU_128-3_BiLSTM_32-1_FC_12".format(
            num_emb, emb_dim)
        model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim,
                                    hparams)

        self.assertEqual(model.layer_groups[0][1], model[1])
        self.assertEqual(model.layer_groups[1][0], model[3])
        self.assertEqual(model.layer_groups[2][0], model[6])

コード例 #17

0

ファイルを表示

ファイル: WaveNetVocoderTrainer.py プロジェクト: sshuster/IdiapTTS

    def decollate_network_output(output,
                                 hidden,
                                 seq_lengths=None,
                                 permutation=None,
                                 batch_first=True):

        # Output of r9y9 Wavenet has batch first, thus output: B x C x T --transpose--> B x T x C
        output = np.transpose(output, (0, 2, 1))
        if not batch_first:
            # output: B x T x C --transpose--> T x B x C
            output = np.transpose(output, (1, 0, 2))
        return ModelTrainer.split_batch(output,
                                        hidden,
                                        seq_length_output=seq_lengths,
                                        permutation=permutation,
                                        batch_first=batch_first)

コード例 #18

0

ファイルを表示

ファイル: test_ModelTrainer.py プロジェクト: sshuster/IdiapTTS

    def _get_hparams(self):
        hparams = ModelTrainer.create_hparams()
        # General parameters
        hparams.add_hparam("num_questions", 409)
        hparams.epochs = 0
        hparams.test_set_perc = 0.05
        hparams.val_set_perc = 0.05
        hparams.optimiser_args["lr"] = 0.02
        hparams.seed = None  # Remove the default seed.
        hparams.out_dir = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            type(self).__name__)
        hparams.num_coded_sps = 20

        # Training parameters.
        hparams.epochs = 0
        hparams.model_name = "test_model.nn"

        return hparams

コード例 #19

0

ファイルを表示

    def test_embeddings(self):
        hparams = ModelTrainer.create_hparams()
        num_emb = 3
        emb_dim = 12
        in_dim = 42  # Contains the embedding index.
        out_dim = 12
        hparams.variable_sequence_length_train = True
        hparams.add_hparam("f_get_emb_index", [lambda x: 0])
        hparams.model_type = "RNNDYN-{}x{}_EMB_(0, 3, 5, 7)-5_RELU_128-3_BiLSTM_32-1_FC_12".format(
            num_emb, emb_dim)
        # hparams.model_type = "RNNDYN-{}x{}_EMB_(-1)-5_RELU_128-2_BiLSTM_32-1_FC_12".format(num_emb, emb_dim)
        model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim,
                                    hparams)

        self.assertEqual(1, len(model.emb_groups))
        self.assertEqual(torch.Size([num_emb, emb_dim]),
                         model.emb_groups[0].weight.shape)
        self.assertEqual(torch.Size([128, in_dim - 1 + emb_dim]),
                         model[0].weight.shape)
        self.assertEqual(torch.Size([128, 128]), model[2].weight.shape)
        self.assertEqual(torch.Size([128, 128 + emb_dim]),
                         model[3].weight.shape)

        self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]),
                         model[5].weight_ih_l0.shape)
        self.assertEqual(torch.Size([32 * 4, 32 * 2 + emb_dim]),
                         model[7].weight_ih_l0_reverse.shape)

        seq_length = torch.tensor((100, 75), dtype=torch.long)
        batch_size = 2
        test_input = torch.ones([seq_length[0], batch_size, in_dim])
        model.init_hidden(batch_size)
        output = model(test_input, None, seq_length, seq_length[0])
        self.assertEqual(torch.Size([seq_length[0], batch_size, out_dim]),
                         output[0].shape)

        seq_length = torch.tensor((100, ), dtype=torch.long)
        batch_size = 1
        test_input = torch.ones([seq_length[0], batch_size, in_dim])
        model.init_hidden(batch_size)
        output = model(test_input, None, seq_length, seq_length[0])
        self.assertEqual(torch.Size([seq_length[0], batch_size, out_dim]),
                         output[0].shape)

コード例 #20

0

ファイルを表示

ファイル: AcousticModelTrainer.py プロジェクト: sshuster/IdiapTTS

    def create_hparams(hparams_string=None, verbose=False):
        """Create model hyper parameter container. Parse non default from given string."""
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)

        hparams.add_hparams(
            num_questions=None,
            question_file=None,  # Used to add labels in plot.
            num_coded_sps=60,
            sp_type="mcep",
            add_deltas=True,
            synth_load_org_sp=False,
            synth_load_org_lf0=False,
            synth_load_org_vuv=False,
            synth_load_org_bap=False)

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams

コード例 #21

0

ファイルを表示

    def gen_figure_phrase(self, hparams, ids_input):
        id_list = ModelTrainer._input_to_str_list(ids_input)
        model_output, model_output_post = self._forward_batched(
            hparams,
            id_list,
            hparams.batch_size_gen_figure,
            synth=False,
            benchmark=False,
            gen_figure=False)

        for id_name, outputs_post in model_output_post.items():

            if outputs_post.ndim < 2:
                outputs_post = np.expand_dims(outputs_post, axis=1)

            lf0 = outputs_post[:, 0]
            output_lf0, _ = interpolate_lin(lf0)
            output_vuv = outputs_post[:, 1]
            output_vuv[output_vuv < 0.5] = 0.0
            output_vuv[output_vuv >= 0.5] = 1.0
            output_vuv = output_vuv.astype(np.bool)

            # Load original lf0 and vuv.
            world_dir = hparams.world_dir if hasattr(hparams, "world_dir") and hparams.world_dir is not None\
                                          else os.path.join(hparams.out_dir, self.dir_extracted_acoustic_features)
            org_labels = WorldFeatLabelGen.load_sample(
                id_name,
                world_dir,
                num_coded_sps=hparams.num_coded_sps,
                num_bap=hparams.num_bap)[:len(output_lf0)]
            _, original_lf0, original_vuv, _ = WorldFeatLabelGen.convert_to_world_features(
                org_labels,
                num_coded_sps=hparams.num_coded_sps,
                num_bap=hparams.num_bap)
            original_lf0, _ = interpolate_lin(original_lf0)
            original_vuv = original_vuv.astype(np.bool)

            phrase_curve = np.fromfile(os.path.join(
                self.flat_trainer.atom_trainer.OutputGen.dir_labels,
                id_name + self.OutputGen.ext_phrase),
                                       dtype=np.float32).reshape(
                                           -1, 1)[:len(original_lf0)]

            f0_mse = (np.exp(original_lf0.squeeze(-1)) -
                      np.exp(phrase_curve.squeeze(-1)))**2
            f0_rmse = math.sqrt(
                (f0_mse * original_vuv[:len(output_lf0)]).sum() /
                original_vuv[:len(output_lf0)].sum())
            self.logger.info("RMSE of {} phrase curve: {} Hz.".format(
                id_name, f0_rmse))

            len_diff = len(original_lf0) - len(lf0)
            original_lf0 = WorldFeatLabelGen.trim_end_sample(
                original_lf0, int(len_diff / 2.0))
            original_lf0 = WorldFeatLabelGen.trim_end_sample(
                original_lf0, int(len_diff / 2.0) + 1, reverse=True)

            # Get a data plotter.
            net_name = os.path.basename(hparams.model_name)
            filename = str(
                os.path.join(hparams.out_dir, id_name + '.' + net_name))
            plotter = DataPlotter()
            # plotter.set_title(id_name + " - " + net_name)

            grid_idx = 0
            graphs_lf0 = list()
            graphs_lf0.append((original_lf0, "Original"))
            graphs_lf0.append((phrase_curve, "Predicted"))
            plotter.set_data_list(grid_idx=grid_idx, data_list=graphs_lf0)
            plotter.set_area_list(grid_idx=grid_idx,
                                  area_list=[(np.invert(original_vuv), '0.8',
                                              1.0, 'Reference unvoiced')])
            plotter.set_label(grid_idx=grid_idx,
                              xlabel='frames [' + str(hparams.frame_size_ms) +
                              ' ms]',
                              ylabel='LF0')
            # amp_lim = max(np.max(np.abs(wcad_lf0)), np.max(np.abs(output_lf0))) * 1.1
            # plotter.set_lim(grid_idx=grid_idx, ymin=-amp_lim, ymax=amp_lim)
            plotter.set_lim(grid_idx=grid_idx, ymin=4.2, ymax=5.4)
            # plotter.set_linestyles(grid_idx=grid_idx, linestyles=[':', '--', '-'])

            # plotter.set_lim(xmin=300, xmax=1100)
            plotter.gen_plot()
            plotter.save_to_file(filename + ".PHRASE" + hparams.gen_figure_ext)

コード例 #22

0

ファイルを表示

ファイル: WaveNetVocoderTrainer.py プロジェクト: sshuster/IdiapTTS

    def create_hparams(hparams_string=None, verbose=False):
        """Create model hyper-parameters. Parse non-default from given string."""
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)
        hparams.synth_vocoder = "raw"

        hparams.add_hparams(
            batch_first=True,
            frame_rate_output_Hz=16000,
            mu=255,
            bit_depth=16,
            silence_threshold_quantized=
            None,  # Beginning and end of audio below the threshold are trimmed.
            teacher_forcing_in_test=True,
            ema_decay=0.9999,

            # Model parameters.
            input_type="mulaw-quantize",
            hinge_regularizer=
            True,  # Only used in MoL prediction (input_type="raw").
            log_scale_min=float(np.log(
                1e-14)),  # Only used for mixture of logistic distributions.
            quantize_channels=256
        )  # 256 for input type mulaw-quantize, otherwise 65536
        if hparams.input_type == "mulaw-quantize":
            hparams.add_hparam("out_channels", hparams.quantize_channels)
        else:
            hparams.add_hparam("out_channels", 10 *
                               3)  # num_mixtures * 3 (pi, mean, log_scale)

        hparams.add_hparams(
            layers=24,  # 20
            stacks=4,  # 2
            residual_channels=512,
            gate_channels=512,
            skip_out_channels=256,
            dropout=1 - 0.95,
            kernel_size=3,
            weight_normalization=True,
            use_cond=True,  # Determines if conditioning is used.
            cin_channels=63,
            upsample_conditional_features=False,
            upsample_scales=[5, 4, 2])
        if hparams.upsample_conditional_features:
            hparams.len_in_out_multiplier = reduce(mul,
                                                   hparams.upsample_scales, 1)
        else:
            hparams.len_in_out_multiplier = 1

        hparams.add_hparams(freq_axis_kernel_size=3,
                            gin_channels=-1,
                            n_speakers=1,
                            use_speaker_embedding=False,
                            sp_type="mcep",
                            load_sp=True,
                            load_lf0=True,
                            load_vuv=True,
                            load_bap=True)

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams