Example #1
0
def question_1g_sanity_check():
    """ Sanity check for to_input_tensor_char() function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g: to_input_tensor_char")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]

    X = vocab.to_input_tensor_char(sentences, "cpu")

    # 6 is the max_sentence_length
    # 4 is batch size
    # 21 is max_word_length
    assert X.shape == (
        6, 4,
        21), f"Size is incorrect: it should be (6, 4, 21) but it is {X.shape}"

    print("Sanity Check Passed for Question 1g: to_input_tensor_char!")
    print("-" * 80)
Example #2
0
def question_1c_sanity_check():
    """ Sanity check for to_input_tensor_char() function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1c: To input tensor")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    padded_sentences = pad_sents_char(word_ids, 0)
    gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl')
    assert padded_sentences == gold_padded_sentences, "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(
        gold_padded_sentences, padded_sentences)

    batch_size = len(gold_padded_sentences)
    max_sentence_length = len(gold_padded_sentences[0])
    max_word_length = len(gold_padded_sentences[0][0])

    padded_sentences_tensor = vocab.to_input_tensor_char(sentences, device=torch.device('cpu'))

    assert (padded_sentences_tensor.size() == (max_sentence_length, batch_size, max_word_length))

    print("Sanity Check Passed for Question 1c: To input tensor")
    print("-" * 80)
Example #3
0
def question_1g_sanity_check(model):
    """ Sanity check for pad_sents_char() function. 
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g: Padding")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    #padded_sentences = pad_sents_char(word_ids, 0)
    padded_sentences = vocab.to_input_tensor_char(sentences, model.device)
    gold_padded_sentences = torch.load(
        './sanity_check_en_es_data/gold_padded_sentences.pkl')

    a = torch.rand(6, 4, 21)
    print(a.size())
    print(padded_sentences.size())
    assert padded_sentences.size() == a.size(
    ), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format(
        a.size(), padded_sentences.size())

    print("Sanity Check Passed for Question 1g: Padding!")
    print("-" * 80)
Example #4
0
def question_1e_sanity_check():
    """Sanity check for to_input_tensor_char() function."""
    print("-" * 80)
    print("Running Sanity Check for Question 1e: To Input Tensor Char")
    print("-" * 80)
    vocabEntry = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [
        ["Human", ":", "What", "do", "we", "want", "?"],
        ["Computer", ":", "Natural", "language", "processing", "!"],
        ["Human", ":", "When", "do", "we", "want", "it", "?"],
        ["Computer", ":", "When", "do", "we", "want", "what", "?"],
    ]
    sentence_length = 8
    BATCH_SIZE = 4
    word_length = 12
    output = vocabEntry.to_input_tensor_char(sentences, "cpu")
    output_expected_size = [sentence_length, BATCH_SIZE, word_length]
    assert (
        list(output.size()) == output_expected_size
    ), "output shape is incorrect: it should be:\n {} but is:\n{}".format(
        output_expected_size, list(output.size()))

    print("Sanity Check Passed for Question 1e: To Input Tensor Char!")
    print("-" * 80)
Example #5
0
def question_1g_sanity_check():
    """ Sanity check for to input tensor char() function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g: Creating Input Tensor")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    device = torch.device("cpu")
    max_sentence_length = max([len(sent) for sent in sentences])
    max_word_length = 21

    input_tensor = vocab.to_input_tensor_char(sentences, device)
    print(input_tensor)
    batch_size = len(sentences)
    correct_shape = [max_sentence_length, batch_size, max_word_length]
    actual_shape = list(input_tensor.size())
    assert actual_shape == correct_shape, "Input Tensor Creation is incorrect: it should be \n{} but is:{}".format(
        correct_shape, input_tensor.size())
    print("Sanity Check Passed for Question 1g: Creating Input Tensor!")
    print("-" * 80)
Example #6
0
def question_1h_sanity_check(model):
    """ Sanity check for highway network 
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1h: Padding")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    #padded_sentences = pad_sents_char(word_ids, 0)
    padded_sentences = vocab.to_input_tensor_char(sentences, model.device)
    gold_padded_sentences = torch.load(
        './sanity_check_en_es_data/gold_padded_sentences.pkl')

    #Test with batch size 1
    x = torch.rand(1, 1, 21)
    hw = Highway(21, 21, 21, 0.5)
    hw.forward(x)
    #Test with batch size 4

    print(a.size())
    print(padded_sentences.size())
    #assert padded_sentences.size() == a.size(), "to_input_tensor size incorrect! is incorrect: it should be:\n {} but is:\n{}".format(a.size(), padded_sentences.size())

    print("Sanity Check Passed for Question 1h: Padding!")
    print("-" * 80)
def question_1g_test():
    """ Custom simple test for to_input_tensor_char() function. 
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g: Padding")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    gold_shape = torch.Size(
        [6, 4, 21])  # (max sentence length, batch size, max word length)

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    input_tensor = vocab.to_input_tensor_char(sentences, device)

    # print("We get torch tensor:\n", input_tensor)
    assert input_tensor.shape == gold_shape, "Ouput tensor shape is incorrect: it should be:\n {} but is:\n{}".format(
        gold_shape, input_tensor.shape)

    print("Sanity Check Passed for Question 1g: Padding!")
    print("-" * 80)
Example #8
0
def question_1e_sanity_check():
    """ Sanity check for to_input_tensor_char() function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1e: To Input Tensor Char")
    print("-" * 80)
    vocabEntry = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human', ':', 'What', 'do', 'we', 'want', '?'],
                 ['Computer', ':', 'Natural', 'language', 'processing', '!'],
                 ['Human', ':', 'When', 'do', 'we', 'want', 'it', '?'],
                 ['Computer', ':', 'When', 'do', 'we', 'want', 'what', '?']]
    sentence_length = 8
    BATCH_SIZE = 4
    word_length = 12
    output = vocabEntry.to_input_tensor_char(sentences, 'cpu')
    output_expected_size = [sentence_length, BATCH_SIZE, word_length]
    assert list(
        output.size()
    ) == output_expected_size, "output shape is incorrect: it should be:\n {} but is:\n{}".format(
        output_expected_size, list(output.size()))

    print("Sanity Check Passed for Question 1e: To Input Tensor Char!")
    print("-" * 80)
Example #9
0
def test_file1_method1():
    batch_size = 2
    max_sent_len = 3
    max_word_length = 21
    sentence = [['ciao', 'come', 'staiii'], ['sto', 'bene']]
    v = VocabEntry()
    tens = v.to_input_tensor_char(sentence, torch.device('cpu'))
    assert tens.shape[0] == max_sent_len
    assert tens.shape[1] == batch_size
    assert tens.shape[2] == max_word_length, ''
def question_1g_sanity_check():
    """ Sanity check for to_input() function.
    """
    print ("-"*80)
    print("Running Sanity Check for Question 1g: Reshape")
    print ("-"*80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    sent_tensor = vocab.to_input_tensor_char(sentences, "cpu")

    print("Sanity Check Passed for Question 1g: Reshape!")
    print("-"*80)
Example #11
0
def question_1c_sanity_check():
    """ Sanity check for to_input_tensor_char() function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1c")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    sent_padded = vocab.to_input_tensor_char(sentences, torch.device)
Example #12
0
def question_1g_sanity_check():
    """ Sanity check for pad_sents_char() function.
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g: question_1g_sanity_check")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    t = vocab.to_input_tensor_char(sentences, torch.device('cpu', 0))
    print("Sanity Check Passed for Question 1g:shape=" + str(t.shape))
    print("-" * 80)
Example #13
0
def question_1g_sanity_check():
    """ Sanity check for to_input_tensor_char() function. 
    """
    print ("-"*80)
    print("Running Sanity Check for Question 1g: Padding")
    print ("-"*80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    input_tensor = vocab.to_input_tensor_char(sentences, torch.device('cuda', 0))
    # print(input_tensor.shape)
    assert input_tensor.shape == (6, 4, 21)
    print("Sanity Check Passed for Question 1g: Padding!")
    print("-"*80)
    pass
Example #14
0
def question_1c_sanity_check():
    print("-" * 80)
    print("Running Sanity Check for Question 1c: Input tensor")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'],
                 ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'],
                 ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)

    padded_sentences = pad_sents_char(word_ids, 0)
    o_tnsr = vocab.to_input_tensor_char(sentences, "cpu")
    print(o_tnsr.shape)
Example #15
0
def question_1g_sanity_check():
    """ Sanity check for to_input_tensor_char() function. 
    """
    print ("-"*80)
    print("Running Sanity Check for Question 1g: Building the input tensor")
    print ("-"*80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'], ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    device = torch.device('cpu')
    padded_tensor = vocab.to_input_tensor_char(sentences, device)
    gold_padded_sentences = torch.load('./sanity_check_en_es_data/gold_padded_sentences.pkl')
    gold_padded_tensor = torch.tensor(gold_padded_sentences, device = device).permute(1, 0, 2)
    assert padded_tensor.size() == gold_padded_tensor.size(), "Sentence padding is incorrect: it should be:\n {} but is:\n{}".format(gold_padded_sentences, padded_sentences)

    print("Sanity Check Passed for Question 1g: Building the input tensor!")
    print("-"*80)
def question_1g_sanity_check():
    """
    Sanity check for to_input_tensor_char() function
    :return:
    """
    print("-" * 80)
    print("Running Sanity Check for Question 1g")
    print("-" * 80)
    vocab = VocabEntry()

    print("Running test on a list of sentences")
    sentences = [['Human:', 'What', 'do', 'we', 'want?'], ['Computer:', 'Natural', 'language', 'processing!'],
                 ['Human:', 'When', 'do', 'we', 'want', 'it?'], ['Computer:', 'When', 'do', 'we', 'want', 'what?']]
    word_ids = vocab.words2charindices(sentences)
    padded_sentences = pad_sents_char(word_ids, 0)

    a = np.asarray(padded_sentences).transpose((1, 0, 2))
    a = torch.Tensor(a)
    b = vocab.to_input_tensor_char(sentences, device="cpu")

    assert a.equal(b), "Wrong!"

    print("Sanity Check Passed for Question 1g")
    print("-" * 80)
Example #17
0
def test1c():
    voc = VocabEntry()
    sents_var = voc.to_input_tensor_char(sentences, None)
    print(sents_var)