def test_embeddings_everywhere(self): hparams = ModelTrainer.create_hparams() num_emb = 3 emb_dim = 12 in_dim = 42 out_dim = 12 hparams.add_hparam("f_get_emb_index", [lambda x: 0]) hparams.model_type = "RNNDYN-{}x{}_EMB_(-1)-3_RELU_128-2_BiLSTM_32-1_FC_12".format( num_emb, emb_dim) model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim, hparams) self.assertEqual(1, len(model.emb_groups)) self.assertEqual(torch.Size([num_emb, emb_dim]), model.emb_groups[0].weight.shape) self.assertEqual(torch.Size([128, in_dim - 1 + emb_dim]), model[0].weight.shape) self.assertEqual(torch.Size([128, 128 + emb_dim]), model[1].weight.shape) self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]), model[3].weight_ih_l0.shape) self.assertEqual(torch.Size([32 * 4, 32 * 2 + emb_dim]), model[4].weight_ih_l0_reverse.shape) pass
def create_hparams(hparams_string=None, verbose=False): hparams = ModelTrainer.create_hparams(hparams_string, verbose=False) hparams.add_hparams( thetas=None, # One initial theta value per filter. k=2, # Order of the impulse response of the atoms. min_atom_amp= 0.25, # Post-processing removes atoms with an absolute amplitude smaller than this. complex_poles=True, # Comples poles possible. phase_init=0.0, # Initial phase of the filters. vuv_loss_weight=1.0, # Weight of the VUV RMSE. L1_loss_weight=1.0, # Weight of the L1 loss on the spiking inputs. weight_unvoiced=0.5, # Weight on unvoiced frames. num_questions=None, # Dimension of the input questions. dist_window_size= 51, # Size of distribution around spikes when training the AtomModel. phrase_bias_init= 0.0, # Initial bias of neural filter, should be estimated mean of speaker's LF0. atom_model_path=None, # Path to load a pre-trained atom model from. hparams_atom= None, # Hyper-parameter container used in the AtomModelTrainer flat_model_path= None, # Path to load a pre-trained atom neural filter model from (without phrase curve). hparams_flat= None, # Hyper-parameter container used in the AtomNeuralFilterModelTrainer. ) if verbose: logging.info(hparams.get_debug_string()) return hparams
def test_save_load_equality(self): hparams = ModelTrainer.create_hparams() hparams.out_dir = os.path.join( self.out_dir, "test_save_load_equality") # Add function name to path. model_path = os.path.join(hparams.out_dir, "test_model.nn") # Create a new model and save it. dim_in, dim_out = 10, 4 total_epochs = 10 model_handler = ModelHandlerPyTorch() model_handler.model = torch.nn.Sequential( torch.nn.Linear(dim_in, dim_out)) model_handler.save_checkpoint(model_path, total_epochs) # Create a new model handler and test load save. hparams.model_type = None model_handler = ModelHandlerPyTorch() saved_total_epochs = model_handler.load_checkpoint(model_path, hparams) self.assertEqual(total_epochs, saved_total_epochs, msg="Saved and loaded total epochs do not match") model_copy_path = os.path.join(hparams.out_dir, "test_model_copy.nn") model_handler.save_checkpoint(model_copy_path, total_epochs) # self.assertTrue(filecmp.cmp(model_path, model_copy_path, False)) # This does not work. self.assertTrue(equal_checkpoint(model_path, model_copy_path), "Loaded and saved models are not the same.") shutil.rmtree(hparams.out_dir)
def create_hparams(hparams_string=None, verbose=False): hparams = ModelTrainer.create_hparams(hparams_string, verbose=False) hparams.add_hparams(thetas=None, k=None, min_atom_amp=0.3, num_questions=None) if verbose: logging.info(hparams.get_debug_string()) return hparams
def create_hparams(hparams_string=None, verbose=False): hparams = ModelTrainer.create_hparams(hparams_string, verbose=False) hparams.add_hparams( # exclude_begin_and_end_silence=False, min_phoneme_length=50000, phoneme_label_type="HTK full" ) # Specifies the format in which the .lab files are stored. # Refer to PhonemeLabelGen.load_sample for a list of possible types. if verbose: logging.info(hparams.get_debug_string()) return hparams
def test_get_item(self): hparams = ModelTrainer.create_hparams() num_emb = 3 emb_dim = 12 in_dim = 42 out_dim = 12 hparams.add_hparam("f_get_emb_index", [lambda x: 0]) hparams.model_type = "RNNDYN-{}x{}_EMB_(0, 3, 5, 7)-5_RELU_128-3_BiLSTM_32-1_FC_12".format( num_emb, emb_dim) model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim, hparams) self.assertEqual(model.layer_groups[0][1], model[1]) self.assertEqual(model.layer_groups[1][0], model[3]) self.assertEqual(model.layer_groups[2][0], model[6])
def test_embeddings(self): hparams = ModelTrainer.create_hparams() num_emb = 3 emb_dim = 12 in_dim = 42 # Contains the embedding index. out_dim = 12 hparams.variable_sequence_length_train = True hparams.add_hparam("f_get_emb_index", [lambda x: 0]) hparams.model_type = "RNNDYN-{}x{}_EMB_(0, 3, 5, 7)-5_RELU_128-3_BiLSTM_32-1_FC_12".format( num_emb, emb_dim) # hparams.model_type = "RNNDYN-{}x{}_EMB_(-1)-5_RELU_128-2_BiLSTM_32-1_FC_12".format(num_emb, emb_dim) model = ModelFactory.create(hparams.model_type, (in_dim, ), out_dim, hparams) self.assertEqual(1, len(model.emb_groups)) self.assertEqual(torch.Size([num_emb, emb_dim]), model.emb_groups[0].weight.shape) self.assertEqual(torch.Size([128, in_dim - 1 + emb_dim]), model[0].weight.shape) self.assertEqual(torch.Size([128, 128]), model[2].weight.shape) self.assertEqual(torch.Size([128, 128 + emb_dim]), model[3].weight.shape) self.assertEqual(torch.Size([32 * 4, 128 + emb_dim]), model[5].weight_ih_l0.shape) self.assertEqual(torch.Size([32 * 4, 32 * 2 + emb_dim]), model[7].weight_ih_l0_reverse.shape) seq_length = torch.tensor((100, 75), dtype=torch.long) batch_size = 2 test_input = torch.ones([seq_length[0], batch_size, in_dim]) model.init_hidden(batch_size) output = model(test_input, None, seq_length, seq_length[0]) self.assertEqual(torch.Size([seq_length[0], batch_size, out_dim]), output[0].shape) seq_length = torch.tensor((100, ), dtype=torch.long) batch_size = 1 test_input = torch.ones([seq_length[0], batch_size, in_dim]) model.init_hidden(batch_size) output = model(test_input, None, seq_length, seq_length[0]) self.assertEqual(torch.Size([seq_length[0], batch_size, out_dim]), output[0].shape)
def create_hparams(hparams_string=None, verbose=False): """Create model hyper parameter container. Parse non default from given string.""" hparams = ModelTrainer.create_hparams(hparams_string, verbose=False) hparams.add_hparams( num_questions=None, question_file=None, # Used to add labels in plot. num_coded_sps=60, sp_type="mcep", add_deltas=True, synth_load_org_sp=False, synth_load_org_lf0=False, synth_load_org_vuv=False, synth_load_org_bap=False) if verbose: logging.info(hparams.get_debug_string()) return hparams
def _get_hparams(self): hparams = ModelTrainer.create_hparams() # General parameters hparams.add_hparam("num_questions", 409) hparams.epochs = 0 hparams.test_set_perc = 0.05 hparams.val_set_perc = 0.05 hparams.optimiser_args["lr"] = 0.02 hparams.seed = None # Remove the default seed. hparams.out_dir = os.path.join( os.path.dirname(os.path.realpath(__file__)), type(self).__name__) hparams.num_coded_sps = 20 # Training parameters. hparams.epochs = 0 hparams.model_name = "test_model.nn" return hparams
def create_hparams(hparams_string=None, verbose=False): """Create model hyper-parameters. Parse non-default from given string.""" hparams = ModelTrainer.create_hparams(hparams_string, verbose=False) hparams.synth_vocoder = "raw" hparams.add_hparams( batch_first=True, frame_rate_output_Hz=16000, mu=255, bit_depth=16, silence_threshold_quantized= None, # Beginning and end of audio below the threshold are trimmed. teacher_forcing_in_test=True, ema_decay=0.9999, # Model parameters. input_type="mulaw-quantize", hinge_regularizer= True, # Only used in MoL prediction (input_type="raw"). log_scale_min=float(np.log( 1e-14)), # Only used for mixture of logistic distributions. quantize_channels=256 ) # 256 for input type mulaw-quantize, otherwise 65536 if hparams.input_type == "mulaw-quantize": hparams.add_hparam("out_channels", hparams.quantize_channels) else: hparams.add_hparam("out_channels", 10 * 3) # num_mixtures * 3 (pi, mean, log_scale) hparams.add_hparams( layers=24, # 20 stacks=4, # 2 residual_channels=512, gate_channels=512, skip_out_channels=256, dropout=1 - 0.95, kernel_size=3, weight_normalization=True, use_cond=True, # Determines if conditioning is used. cin_channels=63, upsample_conditional_features=False, upsample_scales=[5, 4, 2]) if hparams.upsample_conditional_features: hparams.len_in_out_multiplier = reduce(mul, hparams.upsample_scales, 1) else: hparams.len_in_out_multiplier = 1 hparams.add_hparams(freq_axis_kernel_size=3, gin_channels=-1, n_speakers=1, use_speaker_embedding=False, sp_type="mcep", load_sp=True, load_lf0=True, load_vuv=True, load_bap=True) if verbose: logging.info(hparams.get_debug_string()) return hparams