def setUp(self): # Seed the Random Number Generators seed = 1234 torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed * 13 // 7) # Load training data & vocabulary train_data_src = submission.read_corpus( './sanity_check_en_es_data/train_sanity_check.es', 'src') train_data_tgt = submission.read_corpus( './sanity_check_en_es_data/train_sanity_check.en', 'tgt') train_data = list(zip(train_data_src, train_data_tgt)) for src_sents, tgt_sents in submission.batch_iter( train_data, batch_size=BATCH_SIZE, shuffle=True): self.src_sents = src_sents self.tgt_sents = tgt_sents break self.vocab = Vocab.load( './sanity_check_en_es_data/vocab_sanity_check.json') # Create NMT Model self.model = submission.NMT(embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, dropout_rate=DROPOUT_RATE, vocab=self.vocab)
def test_0(self): """1d-0-basic: Sanity check for Encode. Compares student output to that of model with dummy data.""" # Seed the Random Number Generators seed = 1234 torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed * 13 // 7) # Load training data & vocabulary train_data_src = submission.read_corpus( './sanity_check_en_es_data/train_sanity_check.es', 'src') train_data_tgt = submission.read_corpus( './sanity_check_en_es_data/train_sanity_check.en', 'tgt') train_data = list(zip(train_data_src, train_data_tgt)) for src_sents, tgt_sents in submission.batch_iter( train_data, batch_size=BATCH_SIZE, shuffle=True): src_sents = src_sents tgt_sents = tgt_sents break vocab = Vocab.load('./sanity_check_en_es_data/vocab_sanity_check.json') # Create NMT Model model = submission.NMT(embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, dropout_rate=DROPOUT_RATE, vocab=vocab) # Configure for Testing reinitialize_layers(model) source_lengths = [len(s) for s in src_sents] source_padded = model.vocab.src.to_input_tensor(src_sents, device=model.device) # Load Outputs enc_hiddens_target = torch.load( './sanity_check_en_es_data/enc_hiddens.pkl') dec_init_state_target = torch.load( './sanity_check_en_es_data/dec_init_state.pkl') # Test with torch.no_grad(): enc_hiddens_pred, dec_init_state_pred = model.encode( source_padded, source_lengths) self.assertTrue( np.allclose(enc_hiddens_target.numpy(), enc_hiddens_pred.numpy()) ), "enc_hiddens is incorrect: it should be:\n {} but is:\n{}".format( enc_hiddens_target, enc_hiddens_pred) print("enc_hiddens Sanity Checks Passed!") self.assertTrue( np.allclose(dec_init_state_target[0].numpy(), dec_init_state_pred[0].numpy()) ), "dec_init_state[0] is incorrect: it should be:\n {} but is:\n{}".format( dec_init_state_target[0], dec_init_state_pred[0]) print("dec_init_state[0] Sanity Checks Passed!") self.assertTrue( np.allclose(dec_init_state_target[1].numpy(), dec_init_state_pred[1].numpy()) ), "dec_init_state[1] is incorrect: it should be:\n {} but is:\n{}".format( dec_init_state_target[1], dec_init_state_pred[1]) print("dec_init_state[1] Sanity Checks Passed!")
def test_1(self): """1e-1-hidden: Combined Outputs Check""" # Set Seeds random.seed(35436) np.random.seed(4355) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) # Create Inputs input = setup() self.vocab = input[-1] # Initialize student model self.model = submission.NMT(embed_size=LARGE_EMBED_SIZE, hidden_size=LARGE_HIDDEN_SIZE, dropout_rate=NONZERO_DROPOUT_RATE, vocab=self.vocab) # Initialize soln model random.seed(35436) np.random.seed(4355) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) self.soln_model = self.run_with_solution_if_possible( submission, lambda sub_or_sol: sub_or_sol).NMT( embed_size=LARGE_EMBED_SIZE, hidden_size=LARGE_HIDDEN_SIZE, dropout_rate=NONZERO_DROPOUT_RATE, vocab=self.vocab) # To prevent dropout self.model.train(False) self.soln_model.train(False) self.source_lengths = [len(s) for s in input[0]] self.source_padded = self.soln_model.vocab.src.to_input_tensor( input[0], device=self.soln_model.device) self.target_padded = self.soln_model.vocab.tgt.to_input_tensor( input[1], device=self.soln_model.device) # Tensor: (tgt_len, b) self.target = input[1] self.combined_outputs = test_combined_outputs( self.source_padded, self.source_lengths, self.target_padded, self.model, self.soln_model, self.vocab) self.assertTrue(self.combined_outputs)
def setUp(self): # Set Seeds random.seed(35436) np.random.seed(4355) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) # Create Inputs input = setup() self.vocab = input[-1] # Initialize student model self.model = submission.NMT(embed_size=LARGE_EMBED_SIZE, hidden_size=LARGE_HIDDEN_SIZE, dropout_rate=NONZERO_DROPOUT_RATE, vocab=self.vocab) # Initialize soln model random.seed(35436) np.random.seed(4355) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) self.soln_model = self.run_with_solution_if_possible( submission, lambda sub_or_sol: sub_or_sol).NMT( embed_size=LARGE_EMBED_SIZE, hidden_size=LARGE_HIDDEN_SIZE, dropout_rate=NONZERO_DROPOUT_RATE, vocab=self.vocab) self.source_lengths = [len(s) for s in input[0]] self.source_padded = self.soln_model.vocab.src.to_input_tensor( input[0], device=self.soln_model.device) self.enc_hidden, self.decode_hidden, self.decode_cell = test_encoding_hiddens( self.source_padded, self.source_lengths, self.model, self.soln_model, self.vocab)
def test_0(self): """1f-0-basic: Sanity check for Step. Compares student output to that of model with dummy data.""" # Seed the Random Number Generators seed = 1234 torch.manual_seed(seed) torch.cuda.manual_seed(seed) np.random.seed(seed * 13 // 7) # Load training data & vocabulary train_data_src = submission.read_corpus( './sanity_check_en_es_data/train_sanity_check.es', 'src') train_data_tgt = submission.read_corpus( './sanity_check_en_es_data/train_sanity_check.en', 'tgt') train_data = list(zip(train_data_src, train_data_tgt)) for src_sents, tgt_sents in submission.batch_iter( train_data, batch_size=BATCH_SIZE, shuffle=True): self.src_sents = src_sents self.tgt_sents = tgt_sents break self.vocab = Vocab.load( './sanity_check_en_es_data/vocab_sanity_check.json') # Create NMT Model self.model = submission.NMT(embed_size=EMBED_SIZE, hidden_size=HIDDEN_SIZE, dropout_rate=DROPOUT_RATE, vocab=self.vocab) reinitialize_layers(self.model) # Inputs Ybar_t = torch.load('./sanity_check_en_es_data/Ybar_t.pkl') dec_init_state = torch.load( './sanity_check_en_es_data/dec_init_state.pkl') enc_hiddens = torch.load('./sanity_check_en_es_data/enc_hiddens.pkl') enc_masks = torch.load('./sanity_check_en_es_data/enc_masks.pkl') enc_hiddens_proj = torch.load( './sanity_check_en_es_data/enc_hiddens_proj.pkl') # Output dec_state_target = torch.load( './sanity_check_en_es_data/dec_state.pkl') o_t_target = torch.load('./sanity_check_en_es_data/o_t.pkl') e_t_target = torch.load('./sanity_check_en_es_data/e_t.pkl') # Run Tests with torch.no_grad(): dec_state_pred, o_t_pred, e_t_pred = self.model.step( Ybar_t, dec_init_state, enc_hiddens, enc_hiddens_proj, enc_masks) self.assertTrue( np.allclose(dec_state_target[0].numpy(), dec_state_pred[0].numpy()), "decoder_state[0] should be:\n {} but is:\n{}".format( dec_state_target[0], dec_state_pred[0])) print("dec_state[0] Sanity Checks Passed!") self.assertTrue( np.allclose(dec_state_target[1].numpy(), dec_state_pred[1].numpy()), "decoder_state[1] should be:\n {} but is:\n{}".format( dec_state_target[1], dec_state_pred[1])) print("dec_state[1] Sanity Checks Passed!") self.assertTrue( np.allclose(o_t_target.numpy(), o_t_pred.numpy()), "combined_output should be:\n {} but is:\n{}".format( o_t_target, o_t_pred)) print("combined_output Sanity Checks Passed!") self.assertTrue( np.allclose(e_t_target.numpy(), e_t_pred.numpy()), "e_t should be:\n {} but is:\n{}".format(e_t_target, e_t_pred))
def setUp(self): # Set Seeds random.seed(35436) np.random.seed(4355) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) # Create Inputs input = setup() self.vocab = input[-1] # Initialize student model self.model = submission.NMT(embed_size=LARGE_EMBED_SIZE, hidden_size=LARGE_HIDDEN_SIZE, dropout_rate=NONZERO_DROPOUT_RATE, vocab=self.vocab) # Initialize soln model random.seed(35436) np.random.seed(4355) torch.manual_seed(42) if torch.cuda.is_available(): torch.cuda.manual_seed(42) self.soln_model = self.run_with_solution_if_possible( submission, lambda sub_or_sol: sub_or_sol).NMT( embed_size=LARGE_EMBED_SIZE, hidden_size=LARGE_HIDDEN_SIZE, dropout_rate=NONZERO_DROPOUT_RATE, vocab=self.vocab) # Initialize alternative soln model (change concat order in step function) # random.seed(35436) # np.random.seed(4355) # torch.manual_seed(42) # if torch.cuda.is_available(): # torch.cuda.manual_seed(42) # self.alt_soln_model = self.run_with_solution_if_possible(submission, lambda sub_or_sol:sub_or_sol).NMT_alt( # embed_size = LARGE_EMBED_SIZE, # hidden_size = LARGE_HIDDEN_SIZE, # dropout_rate = NONZERO_DROPOUT_RATE, # vocab = self.vocab # ) # To prevent dropout self.model.train(False) self.soln_model.train(False) # self.alt_soln_model.train(False) # Generate Inputs random.seed(35436) np.random.seed(4355) torch.manual_seed(42) Ybar_t = torch.randn(LARGE_BATCH_SIZE, LARGE_EMBED_SIZE + LARGE_HIDDEN_SIZE, dtype=torch.float) dec_init_state = (torch.randn(LARGE_BATCH_SIZE, LARGE_HIDDEN_SIZE, dtype=torch.float), torch.randn(LARGE_BATCH_SIZE, LARGE_HIDDEN_SIZE, dtype=torch.float)) enc_hiddens = torch.randn(LARGE_BATCH_SIZE, 20, LARGE_HIDDEN_SIZE * 2, dtype=torch.float) enc_hiddens_proj = torch.randn(LARGE_BATCH_SIZE, 20, LARGE_HIDDEN_SIZE, dtype=torch.float) enc_masks = (torch.randn(LARGE_BATCH_SIZE, 20, dtype=torch.float) >= 0.5) self.dec_hidden_result, self.dec_state_result, self.o_t_result, self.e_t_result = \ test_q1f(Ybar_t, dec_init_state, enc_hiddens, enc_hiddens_proj, enc_masks, self.model, self.soln_model)
def setUp(self): self.vocab = DummyVocab() self.student_result = submission.NMT(12, 17, self.vocab, dropout_rate=0.34)