def _get_encoder_vae_pool_last_config(self, in_dim, out_dim): return enc_dec_dyn.Config.ModuleConfig( name="EncoderVAE", config=rnn_dyn.Config( in_dim=in_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="Conv1d", out_dim=2, num_layers=1, kernel_size=3, stride=2, padding=1), rnn_dyn.Config.LayerConfig(layer_type="Conv1d", out_dim=4, num_layers=2, kernel_size=3, stride=2, padding=1), rnn_dyn.Config.LayerConfig(layer_type="GRU", out_dim=8), rnn_dyn.Config.LayerConfig(layer_type="PoolLast", batch_first=True), rnn_dyn.Config.LayerConfig(layer_type="VAE", out_dim=out_dim) ]), input_names=["acoustic_features"], output_names=["emb_z", "emb_mu", "emb_logvar"], process_group=0)
def _get_fixed_attention_decoder_config(self, audio_encoder_dim, in_dim, out_dim, n_frames_per_step, p_teacher_forcing): return enc_dec_dyn.Config.DecoderConfig( attention_args={ enc_dec_dyn.ATTENTION_GROUND_TRUTH: "attention_matrix" }, attention_config=enc_dec_dyn.FIXED_ATTENTION, teacher_forcing_input_names=["acoustic_features"], config=rnn_dyn.Config( in_dim=in_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=8, nonlin="RELU"), rnn_dyn.Config.LayerConfig(layer_type="LSTM", out_dim=4) ]), input_names=["phoneme_embeddings", "emb_z"], name="Decoder", n_frames_per_step=n_frames_per_step, p_teacher_forcing=p_teacher_forcing, pre_net_config=rnn_dyn.Config(in_dim=out_dim, layer_configs=[ rnn_dyn.Config.LayerConfig( layer_type="linear", out_dim=audio_encoder_dim, nonlin="relu", num_layers=2) ]), process_group=1, projection_configs=[ enc_dec_dyn.Config.ProjectionConfig( config=rnn_dyn.Config( in_dim=4, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=out_dim * n_frames_per_step) ]), name="AcousticFeaturesProjector", output_names=["pred_intermediate_acoustic_features"], out_dim=out_dim, is_autoregressive_input=True) ])
def test_nonlins(self): hparams = ModularTrainer.create_hparams() in_dim = 42 out_dim = 12 # hparams.model_type = "RNNDYN-1_FC_16-1_LIN_18-1_linear_20-1_RELU_22-1_TANH_24-1_FC_{}".format(out_dim) model_config = rnn_dyn.Config( in_dim=in_dim, batch_first=True, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=16), rnn_dyn.Config.LayerConfig(layer_type="LIN", out_dim=18), rnn_dyn.Config.LayerConfig(layer_type="linear", out_dim=20), rnn_dyn.Config.LayerConfig(layer_type="Linear", num_layers=2, out_dim=22, nonlin="ReLU"), rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=22), rnn_dyn.Config.LayerConfig(layer_type="SELU", inplace=True), rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=out_dim), rnn_dyn.Config.LayerConfig(layer_type="Conv1d", kernel_size=5, nonlin="ReLU", out_dim=out_dim) ], hparams=hparams) model = model_config.create_model() # print(list(model.modules())) # model = ModelFactory.create(hparams.model_type, (in_dim,), out_dim, hparams) for layer_idx in range(3): num_sublayers = len(model[layer_idx].module) if num_sublayers > 1: self.assertEqual( 1, num_sublayers, "Layer {} should not have a non linearity but has {}.". format(layer_idx, type(model[layer_idx].module[1]))) seq_layer = model[3].module self.assertEqual( torch.nn.ReLU, type(seq_layer[1]), "Layer {} should have a non-linearity {} but has {}.".format( 3, torch.nn.ReLU, type(seq_layer[1]))) self.assertEqual( torch.nn.ReLU, type(seq_layer[3]), "Layer {} should have a non-linearity {} but has {}.".format( 3, torch.nn.ReLU, type(seq_layer[1]))) layer = model[5].module[0] self.assertEqual( torch.nn.SELU, type(layer), "Layer {} should be {} but is {}.".format(5, torch.nn.SELU, type(layer))) seq_layer = model[7].module self.assertEqual( torch.nn.ReLU, type(seq_layer[1]), "Layer {} should have a non-linearity {} but has {}.".format( 3, torch.nn.ReLU, type(seq_layer[1])))
def _get_encoder_embedding_config(self, out_dim): return enc_dec_dyn.Config.ModuleConfig( name="Embedding", config=rnn_dyn.Config(in_dim=1, layer_configs=[ rnn_dyn.Config.LayerConfig( layer_type='Embedding', num_embeddings=2, embedding_dim=out_dim) ]), input_names=['emb_idx'], output_names=["emb_z"], process_group=0)
def test_save_load_equality(self): hparams = ModularTrainer.create_hparams() hparams.optimiser_type = "Adam" hparams.optimiser_args["lr"] = 0.1 # Add function name to path. out_dir = os.path.join(self.out_dir, "test_save_load_equality") model_path = os.path.join(out_dir, "test_model") # Create a new model, run the optimiser once to obtain a state, and save everything. in_dim, out_dim = 10, 4 total_epochs = 10 model_handler = ModularModelHandlerPyTorch() model_handler.model = rnn_dyn.Config(in_dim=in_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=out_dim) ]).create_model() model_handler.set_optimiser(hparams) seq_length = torch.tensor((10, 7), dtype=torch.long) batch_size = 2 test_input = torch.ones([seq_length[0], batch_size, in_dim]) model_handler.model.init_hidden(batch_size) output = model_handler.model(test_input, seq_lengths_input=seq_length, max_length_inputs=seq_length.max())[0] output.mean().backward() model_handler.optimiser.step() model_handler.save_checkpoint(epoch=total_epochs, model_path=model_path) # Create a new model handler and test load save. model_handler_copy = ModularModelHandlerPyTorch() model_handler_copy.load_checkpoint( hparams, model_path=model_path, load_optimiser=True, epoch=total_epochs, verbose=False) zip_params = zip(model_handler.model.parameters(), model_handler_copy.model.parameters()) self.assertTrue(all([(x == x_copy).all() for x, x_copy in zip_params]), "Loaded and saved models are not the same.") current_opt_state = model_handler.optimiser.state_dict()["state"] copy_opt_state = model_handler_copy.optimiser.state_dict()["state"] self.assertTrue(equal_iterable(current_opt_state, copy_opt_state), "Loaded and saved optimisers are not the same.") shutil.rmtree(out_dir)
def _get_postnet_config(self, out_dim): return enc_dec_dyn.Config.ModuleConfig( name="Postnet", config=rnn_dyn.Config( in_dim=out_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="Conv1d", out_dim=4, kernel_size=3), rnn_dyn.Config.LayerConfig(layer_type="BatchNorm1d"), rnn_dyn.Config.LayerConfig(layer_type="ReLU"), rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=out_dim) ]), input_names=["pred_intermediate_acoustic_features"], output_names=["pred_acoustic_features"], process_group=2)
def _get_parallel_decoder_config(self, in_dim, out_dim): return enc_dec_dyn.Config.ModuleConfig( config=rnn_dyn.Config( in_dim=in_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=8, nonlin="RELU", dropout=0.1), rnn_dyn.Config.LayerConfig(layer_type="LSTM", out_dim=out_dim, dropout=0.1) ]), input_names=["upsampled_phoneme_embeddings", "emb_z"], name="ParallelDecoder", process_group=2, output_names=["pred_acoustic_features"])
def test_save_load(self): num_emb = 3 emb_dim = 12 in_dim = 42 # Contains the embedding index. out_dim = 12 model_config = rnn_dyn.Config( in_dim=in_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=128, num_layers=2, nonlin="relu"), rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=128, num_layers=3, nonlin="tanh"), rnn_dyn.Config.LayerConfig(layer_type="LSTM", out_dim=32, num_layers=3, bidirectional=True), rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=out_dim) ], emb_configs=[ rnn_dyn.Config.EmbeddingConfig( embedding_dim=emb_dim, name="emb1", num_embedding=num_emb, affected_layer_group_indices=(0, 2, 3)) ]) model = model_config.create_model() other_model = model_config.create_model() self.assertTrue((list(model.parameters())[0] != list( other_model.parameters())[0]).any()) config_json = model.get_config_as_json() params = model.state_dict() recreated_config = jsonpickle.decode(config_json) recreated_model = recreated_config.create_model() recreated_model.load_state_dict(params) self.assertTrue((list(model.parameters())[0] == list( recreated_model.parameters())[0]).all())
def _get_encoder_config(self, out_dim): return enc_dec_dyn.Config.ModuleConfig( name="Encoder", config=rnn_dyn.Config( in_dim=1, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="Embedding", num_embeddings=2, embedding_dim=4), rnn_dyn.Config.LayerConfig(layer_type="Conv1d", out_dim=6, kernel_size=3, nonlin="ReLU", padding=1), rnn_dyn.Config.LayerConfig(layer_type="BatchNorm1d"), rnn_dyn.Config.LayerConfig(layer_type="Linear", out_dim=out_dim) ], ), input_names=["phonemes"], output_names=["phoneme_embeddings"], process_group=0)
def test_embeddings(self): hparams = ModularTrainer.create_hparams() num_emb = 3 emb_dim = 12 in_dim = 42 # Contains the embedding index. out_dim = 12 model_config = rnn_dyn.Config( in_dim=in_dim, layer_configs=[ rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=128, num_layers=2, nonlin="relu"), rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=128, num_layers=3, nonlin="tanh"), rnn_dyn.Config.LayerConfig(layer_type="LSTM", out_dim=32, num_layers=3, bidirectional=True), rnn_dyn.Config.LayerConfig(layer_type="FC", out_dim=out_dim) ], emb_configs=[ rnn_dyn.Config.EmbeddingConfig( embedding_dim=emb_dim, name="emb1", num_embedding=num_emb, affected_layer_group_indices=(0, 2, 3)) ]) model = model_config.create_model() hparams.add_hparam("f_get_emb_index", [self._f_get_emb_index]) self.assertEqual(1, len(model.emb_groups)) self.assertEqual(torch.Size([num_emb, emb_dim]), model.emb_groups["emb1"].weight.shape) self.assertEqual(torch.Size([128, in_dim + emb_dim]), model[0][0].weight.shape) self.assertEqual(torch.Size([128, 128]), model[0][2].weight.shape) self.assertEqual(torch.Size([128, 128]), model[1][0].weight.shape) self.assertEqual(torch.nn.Tanh, type(model[1][1])) self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]), model[2].weight_ih_l0.shape) self.assertEqual(torch.Size([32 * 4, 32 * 2]), model[2].weight_ih_l2_reverse.shape) seq_length = torch.tensor((100, 75), dtype=torch.long) batch_size = 2 test_input = torch.ones([batch_size, seq_length[0], in_dim]) test_input_emb = torch.ones([batch_size, seq_length[0], 1]) model.init_hidden(batch_size) output = model(test_input, test_input_emb, seq_lengths_input=seq_length, max_length_inputs=seq_length[0]) self.assertEqual(torch.Size([batch_size, seq_length[0], out_dim]), output[0].shape) seq_length = torch.tensor((100, ), dtype=torch.long) batch_size = 1 test_input = torch.ones([batch_size, seq_length[0], in_dim]) test_input_emb = torch.ones([batch_size, seq_length[0], 1]) model.init_hidden(batch_size) output = model(test_input, test_input_emb, seq_lengths_input=seq_length, max_length_inputs=seq_length[0]) self.assertEqual(torch.Size([batch_size, seq_length[0], out_dim]), output[0].shape)