Esempio n. 1
0
 def _get_encoder_vae_pool_last_config(self, in_dim, out_dim):
     return enc_dec_dyn.Config.ModuleConfig(
         name="EncoderVAE",
         config=rnn_dyn.Config(
             in_dim=in_dim,
             layer_configs=[
                 rnn_dyn.Config.LayerConfig(layer_type="Conv1d",
                                            out_dim=2,
                                            num_layers=1,
                                            kernel_size=3,
                                            stride=2,
                                            padding=1),
                 rnn_dyn.Config.LayerConfig(layer_type="Conv1d",
                                            out_dim=4,
                                            num_layers=2,
                                            kernel_size=3,
                                            stride=2,
                                            padding=1),
                 rnn_dyn.Config.LayerConfig(layer_type="GRU", out_dim=8),
                 rnn_dyn.Config.LayerConfig(layer_type="PoolLast",
                                            batch_first=True),
                 rnn_dyn.Config.LayerConfig(layer_type="VAE",
                                            out_dim=out_dim)
             ]),
         input_names=["acoustic_features"],
         output_names=["emb_z", "emb_mu", "emb_logvar"],
         process_group=0)
Esempio n. 2
0
 def _get_fixed_attention_decoder_config(self, audio_encoder_dim, in_dim,
                                         out_dim, n_frames_per_step,
                                         p_teacher_forcing):
     return enc_dec_dyn.Config.DecoderConfig(
         attention_args={
             enc_dec_dyn.ATTENTION_GROUND_TRUTH: "attention_matrix"
         },
         attention_config=enc_dec_dyn.FIXED_ATTENTION,
         teacher_forcing_input_names=["acoustic_features"],
         config=rnn_dyn.Config(
             in_dim=in_dim,
             layer_configs=[
                 rnn_dyn.Config.LayerConfig(layer_type="FC",
                                            out_dim=8,
                                            nonlin="RELU"),
                 rnn_dyn.Config.LayerConfig(layer_type="LSTM", out_dim=4)
             ]),
         input_names=["phoneme_embeddings", "emb_z"],
         name="Decoder",
         n_frames_per_step=n_frames_per_step,
         p_teacher_forcing=p_teacher_forcing,
         pre_net_config=rnn_dyn.Config(in_dim=out_dim,
                                       layer_configs=[
                                           rnn_dyn.Config.LayerConfig(
                                               layer_type="linear",
                                               out_dim=audio_encoder_dim,
                                               nonlin="relu",
                                               num_layers=2)
                                       ]),
         process_group=1,
         projection_configs=[
             enc_dec_dyn.Config.ProjectionConfig(
                 config=rnn_dyn.Config(
                     in_dim=4,
                     layer_configs=[
                         rnn_dyn.Config.LayerConfig(layer_type="FC",
                                                    out_dim=out_dim *
                                                    n_frames_per_step)
                     ]),
                 name="AcousticFeaturesProjector",
                 output_names=["pred_intermediate_acoustic_features"],
                 out_dim=out_dim,
                 is_autoregressive_input=True)
         ])
Esempio n. 3
0
    def test_nonlins(self):
        hparams = ModularTrainer.create_hparams()
        in_dim = 42
        out_dim = 12
        # hparams.model_type = "RNNDYN-1_FC_16-1_LIN_18-1_linear_20-1_RELU_22-1_TANH_24-1_FC_{}".format(out_dim)
        model_config = rnn_dyn.Config(
            in_dim=in_dim,
            batch_first=True,
            layer_configs=[
                rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=16),
                rnn_dyn.Config.LayerConfig(layer_type="LIN", out_dim=18),
                rnn_dyn.Config.LayerConfig(layer_type="linear", out_dim=20),
                rnn_dyn.Config.LayerConfig(layer_type="Linear",
                                           num_layers=2,
                                           out_dim=22,
                                           nonlin="ReLU"),
                rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=22),
                rnn_dyn.Config.LayerConfig(layer_type="SELU", inplace=True),
                rnn_dyn.Config.LayerConfig(layer_type="Linear",
                                           out_dim=out_dim),
                rnn_dyn.Config.LayerConfig(layer_type="Conv1d",
                                           kernel_size=5,
                                           nonlin="ReLU",
                                           out_dim=out_dim)
            ],
            hparams=hparams)
        model = model_config.create_model()
        # print(list(model.modules()))
        # model = ModelFactory.create(hparams.model_type, (in_dim,), out_dim, hparams)

        for layer_idx in range(3):
            num_sublayers = len(model[layer_idx].module)
            if num_sublayers > 1:
                self.assertEqual(
                    1, num_sublayers,
                    "Layer {} should not have a non linearity but has {}.".
                    format(layer_idx, type(model[layer_idx].module[1])))
        seq_layer = model[3].module
        self.assertEqual(
            torch.nn.ReLU, type(seq_layer[1]),
            "Layer {} should have a non-linearity {} but has {}.".format(
                3, torch.nn.ReLU, type(seq_layer[1])))
        self.assertEqual(
            torch.nn.ReLU, type(seq_layer[3]),
            "Layer {} should have a non-linearity {} but has {}.".format(
                3, torch.nn.ReLU, type(seq_layer[1])))
        layer = model[5].module[0]
        self.assertEqual(
            torch.nn.SELU, type(layer),
            "Layer {} should be {} but is {}.".format(5, torch.nn.SELU,
                                                      type(layer)))
        seq_layer = model[7].module
        self.assertEqual(
            torch.nn.ReLU, type(seq_layer[1]),
            "Layer {} should have a non-linearity {} but has {}.".format(
                3, torch.nn.ReLU, type(seq_layer[1])))
Esempio n. 4
0
 def _get_encoder_embedding_config(self, out_dim):
     return enc_dec_dyn.Config.ModuleConfig(
         name="Embedding",
         config=rnn_dyn.Config(in_dim=1,
                               layer_configs=[
                                   rnn_dyn.Config.LayerConfig(
                                       layer_type='Embedding',
                                       num_embeddings=2,
                                       embedding_dim=out_dim)
                               ]),
         input_names=['emb_idx'],
         output_names=["emb_z"],
         process_group=0)
Esempio n. 5
0
    def test_save_load_equality(self):
        hparams = ModularTrainer.create_hparams()
        hparams.optimiser_type = "Adam"
        hparams.optimiser_args["lr"] = 0.1
        # Add function name to path.
        out_dir = os.path.join(self.out_dir, "test_save_load_equality")
        model_path = os.path.join(out_dir, "test_model")

        # Create a new model, run the optimiser once to obtain a state, and save everything.
        in_dim, out_dim = 10, 4
        total_epochs = 10
        model_handler = ModularModelHandlerPyTorch()
        model_handler.model = rnn_dyn.Config(in_dim=in_dim, layer_configs=[
            rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=out_dim)
        ]).create_model()
        model_handler.set_optimiser(hparams)

        seq_length = torch.tensor((10, 7), dtype=torch.long)
        batch_size = 2
        test_input = torch.ones([seq_length[0], batch_size, in_dim])
        model_handler.model.init_hidden(batch_size)
        output = model_handler.model(test_input, seq_lengths_input=seq_length,
                                     max_length_inputs=seq_length.max())[0]
        output.mean().backward()

        model_handler.optimiser.step()
        model_handler.save_checkpoint(epoch=total_epochs, model_path=model_path)

        # Create a new model handler and test load save.
        model_handler_copy = ModularModelHandlerPyTorch()
        model_handler_copy.load_checkpoint(
            hparams,
            model_path=model_path,
            load_optimiser=True,
            epoch=total_epochs,
            verbose=False)

        zip_params = zip(model_handler.model.parameters(),
                         model_handler_copy.model.parameters())
        self.assertTrue(all([(x == x_copy).all() for x, x_copy in zip_params]),
                        "Loaded and saved models are not the same.")
        current_opt_state = model_handler.optimiser.state_dict()["state"]
        copy_opt_state = model_handler_copy.optimiser.state_dict()["state"]
        self.assertTrue(equal_iterable(current_opt_state, copy_opt_state),
                        "Loaded and saved optimisers are not the same.")

        shutil.rmtree(out_dir)
Esempio n. 6
0
 def _get_postnet_config(self, out_dim):
     return enc_dec_dyn.Config.ModuleConfig(
         name="Postnet",
         config=rnn_dyn.Config(
             in_dim=out_dim,
             layer_configs=[
                 rnn_dyn.Config.LayerConfig(layer_type="Conv1d",
                                            out_dim=4,
                                            kernel_size=3),
                 rnn_dyn.Config.LayerConfig(layer_type="BatchNorm1d"),
                 rnn_dyn.Config.LayerConfig(layer_type="ReLU"),
                 rnn_dyn.Config.LayerConfig(layer_type="Linear",
                                            out_dim=out_dim)
             ]),
         input_names=["pred_intermediate_acoustic_features"],
         output_names=["pred_acoustic_features"],
         process_group=2)
Esempio n. 7
0
 def _get_parallel_decoder_config(self, in_dim, out_dim):
     return enc_dec_dyn.Config.ModuleConfig(
         config=rnn_dyn.Config(
             in_dim=in_dim,
             layer_configs=[
                 rnn_dyn.Config.LayerConfig(layer_type="FC",
                                            out_dim=8,
                                            nonlin="RELU",
                                            dropout=0.1),
                 rnn_dyn.Config.LayerConfig(layer_type="LSTM",
                                            out_dim=out_dim,
                                            dropout=0.1)
             ]),
         input_names=["upsampled_phoneme_embeddings", "emb_z"],
         name="ParallelDecoder",
         process_group=2,
         output_names=["pred_acoustic_features"])
Esempio n. 8
0
    def test_save_load(self):
        num_emb = 3
        emb_dim = 12
        in_dim = 42  # Contains the embedding index.
        out_dim = 12
        model_config = rnn_dyn.Config(
            in_dim=in_dim,
            layer_configs=[
                rnn_dyn.Config.LayerConfig(layer_type="FC",
                                           out_dim=128,
                                           num_layers=2,
                                           nonlin="relu"),
                rnn_dyn.Config.LayerConfig(layer_type="FC",
                                           out_dim=128,
                                           num_layers=3,
                                           nonlin="tanh"),
                rnn_dyn.Config.LayerConfig(layer_type="LSTM",
                                           out_dim=32,
                                           num_layers=3,
                                           bidirectional=True),
                rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=out_dim)
            ],
            emb_configs=[
                rnn_dyn.Config.EmbeddingConfig(
                    embedding_dim=emb_dim,
                    name="emb1",
                    num_embedding=num_emb,
                    affected_layer_group_indices=(0, 2, 3))
            ])
        model = model_config.create_model()

        other_model = model_config.create_model()
        self.assertTrue((list(model.parameters())[0] != list(
            other_model.parameters())[0]).any())

        config_json = model.get_config_as_json()
        params = model.state_dict()
        recreated_config = jsonpickle.decode(config_json)
        recreated_model = recreated_config.create_model()
        recreated_model.load_state_dict(params)

        self.assertTrue((list(model.parameters())[0] == list(
            recreated_model.parameters())[0]).all())
Esempio n. 9
0
 def _get_encoder_config(self, out_dim):
     return enc_dec_dyn.Config.ModuleConfig(
         name="Encoder",
         config=rnn_dyn.Config(
             in_dim=1,
             layer_configs=[
                 rnn_dyn.Config.LayerConfig(layer_type="Embedding",
                                            num_embeddings=2,
                                            embedding_dim=4),
                 rnn_dyn.Config.LayerConfig(layer_type="Conv1d",
                                            out_dim=6,
                                            kernel_size=3,
                                            nonlin="ReLU",
                                            padding=1),
                 rnn_dyn.Config.LayerConfig(layer_type="BatchNorm1d"),
                 rnn_dyn.Config.LayerConfig(layer_type="Linear",
                                            out_dim=out_dim)
             ],
         ),
         input_names=["phonemes"],
         output_names=["phoneme_embeddings"],
         process_group=0)
Esempio n. 10
0
    def test_embeddings(self):
        hparams = ModularTrainer.create_hparams()
        num_emb = 3
        emb_dim = 12
        in_dim = 42  # Contains the embedding index.
        out_dim = 12
        model_config = rnn_dyn.Config(
            in_dim=in_dim,
            layer_configs=[
                rnn_dyn.Config.LayerConfig(layer_type="FC",
                                           out_dim=128,
                                           num_layers=2,
                                           nonlin="relu"),
                rnn_dyn.Config.LayerConfig(layer_type="FC",
                                           out_dim=128,
                                           num_layers=3,
                                           nonlin="tanh"),
                rnn_dyn.Config.LayerConfig(layer_type="LSTM",
                                           out_dim=32,
                                           num_layers=3,
                                           bidirectional=True),
                rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=out_dim)
            ],
            emb_configs=[
                rnn_dyn.Config.EmbeddingConfig(
                    embedding_dim=emb_dim,
                    name="emb1",
                    num_embedding=num_emb,
                    affected_layer_group_indices=(0, 2, 3))
            ])
        model = model_config.create_model()
        hparams.add_hparam("f_get_emb_index", [self._f_get_emb_index])

        self.assertEqual(1, len(model.emb_groups))
        self.assertEqual(torch.Size([num_emb, emb_dim]),
                         model.emb_groups["emb1"].weight.shape)
        self.assertEqual(torch.Size([128, in_dim + emb_dim]),
                         model[0][0].weight.shape)
        self.assertEqual(torch.Size([128, 128]), model[0][2].weight.shape)
        self.assertEqual(torch.Size([128, 128]), model[1][0].weight.shape)
        self.assertEqual(torch.nn.Tanh, type(model[1][1]))

        self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]),
                         model[2].weight_ih_l0.shape)
        self.assertEqual(torch.Size([32 * 4, 32 * 2]),
                         model[2].weight_ih_l2_reverse.shape)

        seq_length = torch.tensor((100, 75), dtype=torch.long)
        batch_size = 2
        test_input = torch.ones([batch_size, seq_length[0], in_dim])
        test_input_emb = torch.ones([batch_size, seq_length[0], 1])
        model.init_hidden(batch_size)
        output = model(test_input,
                       test_input_emb,
                       seq_lengths_input=seq_length,
                       max_length_inputs=seq_length[0])
        self.assertEqual(torch.Size([batch_size, seq_length[0], out_dim]),
                         output[0].shape)

        seq_length = torch.tensor((100, ), dtype=torch.long)
        batch_size = 1
        test_input = torch.ones([batch_size, seq_length[0], in_dim])
        test_input_emb = torch.ones([batch_size, seq_length[0], 1])
        model.init_hidden(batch_size)
        output = model(test_input,
                       test_input_emb,
                       seq_lengths_input=seq_length,
                       max_length_inputs=seq_length[0])
        self.assertEqual(torch.Size([batch_size, seq_length[0], out_dim]),
                         output[0].shape)