def question_1a_sanity_check(): """ Sanity check for words2charindices function. """ print("-" * 80) print("Running Sanity Check for Question 1a: words2charindices()") print("-" * 80) vocab = VocabEntry() print('Running test on small list of sentences') sentences = [["a", "b", "c?"], ["~d~", "c", "b", "a"]] small_ind = vocab.words2charindices(sentences) small_ind_gold = [[[1, 30, 2], [1, 31, 2], [1, 32, 70, 2]], [[1, 85, 33, 85, 2], [1, 32, 2], [1, 31, 2], [1, 30, 2]]] assert (small_ind == small_ind_gold), \ "small test resulted in indices list {:}, expected {:}".format(small_ind, small_ind_gold) print('Running test on large list of sentences') tgt_sents = [ ['<s>', "Let's", 'start', 'by', 'thinking', 'about', 'the', 'member', 'countries', 'of', 'the', 'OECD,', 'or', 'the', 'Organization', 'of', 'Economic', 'Cooperation', 'and', 'Development.', '</s>'], ['<s>', 'In', 'the', 'case', 'of', 'gun', 'control,', 'we', 'really', 'underestimated', 'our', 'opponents.', '</s>'], ['<s>', 'Let', 'me', 'share', 'with', 'those', 'of', 'you', 'here', 'in', 'the', 'first', 'row.', '</s>'], ['<s>', 'It', 'suggests', 'that', 'we', 'care', 'about', 'the', 'fight,', 'about', 'the', 'challenge.', '</s>'], ['<s>', 'A', 'lot', 'of', 'numbers', 'there.', 'A', 'lot', 'of', 'numbers.', '</s>']] tgt_ind = vocab.words2charindices(tgt_sents) tgt_ind_gold = pickle.load(open('./sanity_check_en_es_data/1a_tgt.pkl', 'rb')) assert (tgt_ind == tgt_ind_gold), "target vocab test resulted in indices list {:}, expected {:}".format(tgt_ind, tgt_ind_gold) print("All Sanity Checks Passed for Question 1a: words2charindices()!") print("-" * 80)
def question_1g_sanity_check(model): """ Sanity check for pad_sents_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1g: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) #padded_sentences = pad_sents_char(word_ids, 0) padded_sentences = vocab.to_input_tensor_char(sentences, model.device) gold_padded_sentences = torch.load( './sanity_check_en_es_data/gold_padded_sentences.pkl') a = torch.rand(6, 4, 21) print(a.size()) print(padded_sentences.size()) assert padded_sentences.size() == a.size( ), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format( a.size(), padded_sentences.size()) print("Sanity Check Passed for Question 1g: Padding!") print("-" * 80)
def question_1c_sanity_check(): """ Sanity check for to_input_tensor_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1c: To input tensor") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl') assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format( gold_padded_sentences, padded_sentences) batch_size = len(gold_padded_sentences) max_sentence_length = len(gold_padded_sentences[0]) max_word_length = len(gold_padded_sentences[0][0]) padded_sentences_tensor = vocab.to_input_tensor_char(sentences, device=torch.device('cpu')) assert (padded_sentences_tensor.size() == (max_sentence_length, batch_size, max_word_length)) print("Sanity Check Passed for Question 1c: To input tensor") print("-" * 80)
def question_1b_sanity_check(): """ Sanity check for pad_sents_char() function. """ print("-" * 80) print("Running Sanity Check for Question 1b: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load( './sanity_check_en_es_data/gold_padded_sentences.pkl') assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format( gold_padded_sentences, padded_sentences) print( len(padded_sentences), torch.transpose(torch.tensor(padded_sentences, dtype=torch.int), 0, 1).shape) print("Sanity Check Passed for Question 1b: Padding!") print("-" * 80)
def question_1h_sanity_check(model): """ Sanity check for highway network """ print("-" * 80) print("Running Sanity Check for Question 1h: Padding") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) #padded_sentences = pad_sents_char(word_ids, 0) padded_sentences = vocab.to_input_tensor_char(sentences, model.device) gold_padded_sentences = torch.load( './sanity_check_en_es_data/gold_padded_sentences.pkl') #Test with batch size 1 x = torch.rand(1, 1, 21) hw = Highway(21, 21, 21, 0.5) hw.forward(x) #Test with batch size 4 print(a.size()) print(padded_sentences.size()) #assert padded_sentences.size() == a.size(), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format(a.size(), padded_sentences.size()) print("Sanity Check Passed for Question 1h: Padding!") print("-" * 80)
def test_question_1e_sanity_check(self): """ Sanity check for words2charindices function. """ vocab = VocabEntry() sentences = [["a", "b", "c?"], ["~d~", "c", "b", "a"]] small_ind = vocab.words2charindices(sentences) small_ind_gold = [[[1, 30, 2], [1, 31, 2], [1, 32, 70, 2]], [[1, 85, 33, 85, 2], [1, 32, 2], [1, 31, 2], [1, 30, 2]]] assert(small_ind == small_ind_gold), \ "small test resulted in indices list {:}, expected {:}".format(small_ind, small_ind_gold) # print('Running test on single sentence') # sentence = ["right", "arcs", "only"] # single_ind = vocab.words2charindices(sentence) # single_ind_gold = [[[1, 47, 2], [1, 38, 2], [1, 36, 2], [1, 37, 2], [1, 49, 2]], [[1, 30, 2], [1, 47, 2], [1, 32, 2], [1, 48, 2]], [[1, 44, 2], [1, 43, 2], [1, 41, 2], [1, 54, 2]]] # assert(single_ind == single_ind_gold), \ # "single sentence test resulted in indices list {:}, expected {:}".format(single_ind, single_ind_gold) print('Running test on large list of sentences') tgt_sents = [['<s>', "Let's", 'start', 'by', 'thinking', 'about', 'the', 'member', 'countries', 'of', 'the', 'OECD,', 'or', 'the', 'Organization', 'of', 'Economic', 'Cooperation', 'and', 'Development.', '</s>'], ['<s>', 'In', 'the', 'case', 'of', 'gun', 'control,', 'we', 'really', 'underestimated', 'our', 'opponents.', '</s>'], ['<s>', 'Let', 'me', 'share', 'with', 'those', 'of', 'you', 'here', 'in', 'the', 'first', 'row.', '</s>'], ['<s>', 'It', 'suggests', 'that', 'we', 'care', 'about', 'the', 'fight,', 'about', 'the', 'challenge.', '</s>'], ['<s>', 'A', 'lot', 'of', 'numbers', 'there.', 'A', 'lot', 'of', 'numbers.', '</s>']] tgt_ind = vocab.words2charindices(tgt_sents) tgt_ind_gold = pickle.load(open('./sanity_check_en_es_data/1e_tgt.pkl', 'rb')) assert(tgt_ind == tgt_ind_gold), "target vocab test resulted in indices list {:}, expected {:}".format(tgt_ind, tgt_ind_gold)
def test_question_1f_sanity_check(self): """ Sanity check for pad_sents_char() function. """ vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl') assert len(gold_padded_sentences) == len(padded_sentences) for expected, got in zip(gold_padded_sentences, padded_sentences): if got != expected: raise AssertionError('got {}: expected: {}'.format(got, expected)) assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(gold_padded_sentences, padded_sentences)
def question_1c_sanity_check(): print("-" * 80) print("Running Sanity Check for Question 1c: Input tensor") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) o_tnsr = vocab.to_input_tensor_char(sentences, "cpu") print(o_tnsr.shape)
def question_1f_sanity_check(): """ Sanity check for pad_sents_char() function. """ print ("-"*80) print("Running Sanity Check for Question 1f: Padding") print ("-"*80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl') assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(gold_padded_sentences, padded_sentences) test_list = [[[4]*33]] padded_sent = pad_sents_char(test_list, 0) assert len(padded_sent[0][0]) == 21 print("Sanity Check Passed for Question 1f: Padding!") print("-"*80)
def question_1g_sanity_check(): """ Sanity check for to_input_tensor_char() function :return: """ print("-" * 80) print("Running Sanity Check for Question 1g") print("-" * 80) vocab = VocabEntry() print("Running test on a list of sentences") sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']] word_ids = vocab.words2charindices(sentences) padded_sentences = pad_sents_char(word_ids, 0) a = np.asarray(padded_sentences).transpose((1, 0, 2)) a = torch.Tensor(a) b = vocab.to_input_tensor_char(sentences, device="cpu") assert a.equal(b), "Wrong!" print("Sanity Check Passed for Question 1g") print("-" * 80)
def test2(): vocab = VocabEntry() word_ids = vocab.words2charindices(sentences) a = pad_sents_char(word_ids, 0) print(a[1])
def test1(): vocab = VocabEntry() print('vocab', vocab) word_ids = vocab.words2charindices(sentences) print('Sentences in Chars', word_ids)