Beispiel #1
0
    def test_embeddings_everywhere(self):
        hparams = ModelTrainer.create_hparams()
        num_emb = 3
        emb_dim = 12
        in_dim = 42
        out_dim = 12
        hparams.add_hparam("f_get_emb_index", [lambda x: 0])
        hparams.model_type = "RNNDYN-{}x{}_EMB_(-1)-3_RELU_128-2_BiLSTM_32-1_FC_12".format(
            num_emb, emb_dim)
        model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim,
                                    hparams)

        self.assertEqual(1, len(model.emb_groups))
        self.assertEqual(torch.Size([num_emb, emb_dim]),
                         model.emb_groups[0].weight.shape)
        self.assertEqual(torch.Size([128, in_dim - 1 + emb_dim]),
                         model[0].weight.shape)
        self.assertEqual(torch.Size([128, 128 + emb_dim]),
                         model[1].weight.shape)

        self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]),
                         model[3].weight_ih_l0.shape)
        self.assertEqual(torch.Size([32 * 4, 32 * 2 + emb_dim]),
                         model[4].weight_ih_l0_reverse.shape)
        pass
Beispiel #2
0
    def create_hparams(hparams_string=None, verbose=False):
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)

        hparams.add_hparams(
            thetas=None,  # One initial theta value per filter.
            k=2,  # Order of the impulse response of the atoms.
            min_atom_amp=
            0.25,  # Post-processing removes atoms with an absolute amplitude smaller than this.
            complex_poles=True,  # Comples poles possible.
            phase_init=0.0,  # Initial phase of the filters.
            vuv_loss_weight=1.0,  # Weight of the VUV RMSE.
            L1_loss_weight=1.0,  # Weight of the L1 loss on the spiking inputs.
            weight_unvoiced=0.5,  # Weight on unvoiced frames.
            num_questions=None,  # Dimension of the input questions.
            dist_window_size=
            51,  # Size of distribution around spikes when training the AtomModel.
            phrase_bias_init=
            0.0,  # Initial bias of neural filter, should be estimated mean of speaker's LF0.
            atom_model_path=None,  # Path to load a pre-trained atom model from.
            hparams_atom=
            None,  # Hyper-parameter container used in the AtomModelTrainer
            flat_model_path=
            None,  # Path to load a pre-trained atom neural filter model from (without phrase curve).
            hparams_flat=
            None,  # Hyper-parameter container used in the AtomNeuralFilterModelTrainer.
        )

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams
Beispiel #3
0
    def test_save_load_equality(self):
        hparams = ModelTrainer.create_hparams()
        hparams.out_dir = os.path.join(
            self.out_dir,
            "test_save_load_equality")  # Add function name to path.
        model_path = os.path.join(hparams.out_dir, "test_model.nn")

        # Create a new model and save it.
        dim_in, dim_out = 10, 4
        total_epochs = 10
        model_handler = ModelHandlerPyTorch()
        model_handler.model = torch.nn.Sequential(
            torch.nn.Linear(dim_in, dim_out))
        model_handler.save_checkpoint(model_path, total_epochs)

        # Create a new model handler and test load save.
        hparams.model_type = None
        model_handler = ModelHandlerPyTorch()
        saved_total_epochs = model_handler.load_checkpoint(model_path, hparams)
        self.assertEqual(total_epochs,
                         saved_total_epochs,
                         msg="Saved and loaded total epochs do not match")
        model_copy_path = os.path.join(hparams.out_dir, "test_model_copy.nn")
        model_handler.save_checkpoint(model_copy_path, total_epochs)

        # self.assertTrue(filecmp.cmp(model_path, model_copy_path, False))  # This does not work.
        self.assertTrue(equal_checkpoint(model_path, model_copy_path),
                        "Loaded and saved models are not the same.")

        shutil.rmtree(hparams.out_dir)
Beispiel #4
0
    def create_hparams(hparams_string=None, verbose=False):
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)

        hparams.add_hparams(thetas=None,
                            k=None,
                            min_atom_amp=0.3,
                            num_questions=None)

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams
    def create_hparams(hparams_string=None, verbose=False):
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)
        hparams.add_hparams(  # exclude_begin_and_end_silence=False,
            min_phoneme_length=50000,
            phoneme_label_type="HTK full"
        )  # Specifies the format in which the .lab files are stored.
        # Refer to PhonemeLabelGen.load_sample for a list of possible types.

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams
Beispiel #6
0
    def test_get_item(self):
        hparams = ModelTrainer.create_hparams()
        num_emb = 3
        emb_dim = 12
        in_dim = 42
        out_dim = 12
        hparams.add_hparam("f_get_emb_index", [lambda x: 0])
        hparams.model_type = "RNNDYN-{}x{}_EMB_(0, 3, 5, 7)-5_RELU_128-3_BiLSTM_32-1_FC_12".format(
            num_emb, emb_dim)
        model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim,
                                    hparams)

        self.assertEqual(model.layer_groups[0][1], model[1])
        self.assertEqual(model.layer_groups[1][0], model[3])
        self.assertEqual(model.layer_groups[2][0], model[6])
Beispiel #7
0
    def test_embeddings(self):
        hparams = ModelTrainer.create_hparams()
        num_emb = 3
        emb_dim = 12
        in_dim = 42  # Contains the embedding index.
        out_dim = 12
        hparams.variable_sequence_length_train = True
        hparams.add_hparam("f_get_emb_index", [lambda x: 0])
        hparams.model_type = "RNNDYN-{}x{}_EMB_(0, 3, 5, 7)-5_RELU_128-3_BiLSTM_32-1_FC_12".format(
            num_emb, emb_dim)
        # hparams.model_type = "RNNDYN-{}x{}_EMB_(-1)-5_RELU_128-2_BiLSTM_32-1_FC_12".format(num_emb, emb_dim)
        model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim,
                                    hparams)

        self.assertEqual(1, len(model.emb_groups))
        self.assertEqual(torch.Size([num_emb, emb_dim]),
                         model.emb_groups[0].weight.shape)
        self.assertEqual(torch.Size([128, in_dim - 1 + emb_dim]),
                         model[0].weight.shape)
        self.assertEqual(torch.Size([128, 128]), model[2].weight.shape)
        self.assertEqual(torch.Size([128, 128 + emb_dim]),
                         model[3].weight.shape)

        self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]),
                         model[5].weight_ih_l0.shape)
        self.assertEqual(torch.Size([32 * 4, 32 * 2 + emb_dim]),
                         model[7].weight_ih_l0_reverse.shape)

        seq_length = torch.tensor((100, 75), dtype=torch.long)
        batch_size = 2
        test_input = torch.ones([seq_length[0], batch_size, in_dim])
        model.init_hidden(batch_size)
        output = model(test_input, None, seq_length, seq_length[0])
        self.assertEqual(torch.Size([seq_length[0], batch_size, out_dim]),
                         output[0].shape)

        seq_length = torch.tensor((100, ), dtype=torch.long)
        batch_size = 1
        test_input = torch.ones([seq_length[0], batch_size, in_dim])
        model.init_hidden(batch_size)
        output = model(test_input, None, seq_length, seq_length[0])
        self.assertEqual(torch.Size([seq_length[0], batch_size, out_dim]),
                         output[0].shape)
    def create_hparams(hparams_string=None, verbose=False):
        """Create model hyper parameter container. Parse non default from given string."""
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)

        hparams.add_hparams(
            num_questions=None,
            question_file=None,  # Used to add labels in plot.
            num_coded_sps=60,
            sp_type="mcep",
            add_deltas=True,
            synth_load_org_sp=False,
            synth_load_org_lf0=False,
            synth_load_org_vuv=False,
            synth_load_org_bap=False)

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams
Beispiel #9
0
    def _get_hparams(self):
        hparams = ModelTrainer.create_hparams()
        # General parameters
        hparams.add_hparam("num_questions", 409)
        hparams.epochs = 0
        hparams.test_set_perc = 0.05
        hparams.val_set_perc = 0.05
        hparams.optimiser_args["lr"] = 0.02
        hparams.seed = None  # Remove the default seed.
        hparams.out_dir = os.path.join(
            os.path.dirname(os.path.realpath(__file__)),
            type(self).__name__)
        hparams.num_coded_sps = 20

        # Training parameters.
        hparams.epochs = 0
        hparams.model_name = "test_model.nn"

        return hparams
    def create_hparams(hparams_string=None, verbose=False):
        """Create model hyper-parameters. Parse non-default from given string."""
        hparams = ModelTrainer.create_hparams(hparams_string, verbose=False)
        hparams.synth_vocoder = "raw"

        hparams.add_hparams(
            batch_first=True,
            frame_rate_output_Hz=16000,
            mu=255,
            bit_depth=16,
            silence_threshold_quantized=
            None,  # Beginning and end of audio below the threshold are trimmed.
            teacher_forcing_in_test=True,
            ema_decay=0.9999,

            # Model parameters.
            input_type="mulaw-quantize",
            hinge_regularizer=
            True,  # Only used in MoL prediction (input_type="raw").
            log_scale_min=float(np.log(
                1e-14)),  # Only used for mixture of logistic distributions.
            quantize_channels=256
        )  # 256 for input type mulaw-quantize, otherwise 65536
        if hparams.input_type == "mulaw-quantize":
            hparams.add_hparam("out_channels", hparams.quantize_channels)
        else:
            hparams.add_hparam("out_channels", 10 *
                               3)  # num_mixtures * 3 (pi, mean, log_scale)

        hparams.add_hparams(
            layers=24,  # 20
            stacks=4,  # 2
            residual_channels=512,
            gate_channels=512,
            skip_out_channels=256,
            dropout=1 - 0.95,
            kernel_size=3,
            weight_normalization=True,
            use_cond=True,  # Determines if conditioning is used.
            cin_channels=63,
            upsample_conditional_features=False,
            upsample_scales=[5, 4, 2])
        if hparams.upsample_conditional_features:
            hparams.len_in_out_multiplier = reduce(mul,
                                                   hparams.upsample_scales, 1)
        else:
            hparams.len_in_out_multiplier = 1

        hparams.add_hparams(freq_axis_kernel_size=3,
                            gin_channels=-1,
                            n_speakers=1,
                            use_speaker_embedding=False,
                            sp_type="mcep",
                            load_sp=True,
                            load_lf0=True,
                            load_vuv=True,
                            load_bap=True)

        if verbose:
            logging.info(hparams.get_debug_string())

        return hparams