Beispiel #1
0
 def test_all_fields_padded_to_max_length(self):
     list_field = ListField([self.field1, self.field2, self.field3])
     list_field.index(self.vocab)
     array_dict = list_field.as_array(list_field.get_padding_lengths())
     numpy.testing.assert_array_almost_equal(array_dict["words"][0], numpy.array([2, 3, 4, 5, 0]))
     numpy.testing.assert_array_almost_equal(array_dict["words"][1], numpy.array([2, 3, 4, 1, 5]))
     numpy.testing.assert_array_almost_equal(array_dict["words"][2], numpy.array([2, 3, 1, 5, 0]))
Beispiel #2
0
 def test_list_field_can_handle_empty_text_fields(self):
     list_field = ListField([self.field1, self.field2, self.empty_text_field])
     list_field.index(self.vocab)
     array_dict = list_field.as_array(list_field.get_padding_lengths())
     numpy.testing.assert_array_equal(array_dict["words"], numpy.array([[2, 3, 4, 5, 0],
                                                                        [2, 3, 4, 1, 5],
                                                                        [0, 0, 0, 0, 0]]))
Beispiel #3
0
    def test_as_array_can_handle_multiple_token_indexers_and_empty_fields(self):
        # pylint: disable=protected-access
        self.field1._token_indexers = self.words_and_characters_indexers
        self.field2._token_indexers = self.words_and_characters_indexers
        self.field3._token_indexers = self.words_and_characters_indexers

        list_field = ListField([self.field1.empty_field(), self.field1, self.field2])
        list_field.index(self.vocab)
        padding_lengths = list_field.get_padding_lengths()
        array_dict = list_field.as_array(padding_lengths)
        words = array_dict["words"]
        characters = array_dict["characters"]

        numpy.testing.assert_array_almost_equal(words, numpy.array([[0, 0, 0, 0, 0],
                                                                    [2, 3, 4, 5, 0],
                                                                    [2, 3, 4, 1, 5]]))

        numpy.testing.assert_array_almost_equal(characters[0], numpy.zeros([5, 9]))

        numpy.testing.assert_array_almost_equal(characters[1], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0],
                                                                            [1, 2, 0, 0, 0, 0, 0, 0, 0],
                                                                            [1, 0, 0, 0, 0, 0, 0, 0, 0],
                                                                            [2, 3, 4, 5, 3, 4, 6, 3, 0],
                                                                            [0, 0, 0, 0, 0, 0, 0, 0, 0]]))

        numpy.testing.assert_array_almost_equal(characters[2], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0],
                                                                            [1, 2, 0, 0, 0, 0, 0, 0, 0],
                                                                            [1, 0, 0, 0, 0, 0, 0, 0, 0],
                                                                            [1, 1, 1, 1, 3, 1, 3, 4, 5],
                                                                            [2, 3, 4, 5, 3, 4, 6, 3, 0]]))
Beispiel #4
0
    def test_pad_can_handle_multiple_token_indexers(self):
        field1 = TextField(["this", "is", "a", "sentence"], self.words_and_characters_indexer)
        field2 = TextField(["this", "is", "a", "different", "sentence"], self.words_and_characters_indexer)
        field3 = TextField(["this", "is", "another", "sentence"], self.words_and_characters_indexer)

        list_field = ListField([field1, field2, field3])
        list_field.index(self.vocab)
        padding_lengths = list_field.get_padding_lengths()
        array_dict = list_field.as_array(padding_lengths)
        words = array_dict["words"]
        characters = array_dict["characters"]
        numpy.testing.assert_array_almost_equal(words, numpy.array([[2, 3, 4, 5, 0],
                                                                    [2, 3, 4, 1, 5],
                                                                    [2, 3, 1, 5, 0]]))

        numpy.testing.assert_array_almost_equal(characters[0], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0],
                                                                            [1, 2, 0, 0, 0, 0, 0, 0, 0],
                                                                            [1, 0, 0, 0, 0, 0, 0, 0, 0],
                                                                            [2, 3, 4, 5, 3, 4, 6, 3, 0],
                                                                            [0, 0, 0, 0, 0, 0, 0, 0, 0]]))

        numpy.testing.assert_array_almost_equal(characters[1], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0],
                                                                            [1, 2, 0, 0, 0, 0, 0, 0, 0],
                                                                            [1, 0, 0, 0, 0, 0, 0, 0, 0],
                                                                            [1, 1, 1, 1, 3, 1, 3, 4, 5],
                                                                            [2, 3, 4, 5, 3, 4, 6, 3, 0]]))

        numpy.testing.assert_array_almost_equal(characters[2], numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0],
                                                                            [1, 2, 0, 0, 0, 0, 0, 0, 0],
                                                                            [1, 4, 1, 5, 1, 3, 1, 0, 0],
                                                                            [2, 3, 4, 5, 3, 4, 6, 3, 0],
                                                                            [0, 0, 0, 0, 0, 0, 0, 0, 0]]))
Beispiel #5
0
 def test_list_field_can_handle_empty_sequence_label_fields(self):
     list_field = ListField([self.sequence_label_field,
                             self.sequence_label_field,
                             self.empty_sequence_label_field])
     list_field.index(self.vocab)
     array = list_field.as_array(list_field.get_padding_lengths())
     numpy.testing.assert_array_equal(array, numpy.array([[1, 1, 0, 1],
                                                          [1, 1, 0, 1],
                                                          [0, 0, 0, 0]]))
Beispiel #6
0
 def test_nested_list_fields_are_padded_correctly(self):
     nested_field1 = ListField([LabelField(c) for c in ['a', 'b', 'c', 'd', 'e']])
     nested_field2 = ListField([LabelField(c) for c in ['f', 'g', 'h', 'i', 'j', 'k']])
     list_field = ListField([nested_field1.empty_field(), nested_field1, nested_field2])
     list_field.index(self.vocab)
     padding_lengths = list_field.get_padding_lengths()
     assert padding_lengths == {'num_fields': 3, 'list_num_fields': 6}
     array = list_field.as_array(padding_lengths)
     numpy.testing.assert_almost_equal(array, [[[-1], [-1], [-1], [-1], [-1], [-1]],
                                               [[0], [1], [2], [3], [4], [-1]],
                                               [[5], [6], [7], [8], [9], [10]]])
Beispiel #7
0
 def test_fields_can_pad_to_greater_than_max_length(self):
     list_field = ListField([self.field1, self.field2, self.field3])
     list_field.index(self.vocab)
     padding_lengths = list_field.get_padding_lengths()
     padding_lengths["list_num_tokens"] = 7
     padding_lengths["num_fields"] = 5
     array_dict = list_field.as_array(padding_lengths)
     numpy.testing.assert_array_almost_equal(array_dict["words"][0], numpy.array([2, 3, 4, 5, 0, 0, 0]))
     numpy.testing.assert_array_almost_equal(array_dict["words"][1], numpy.array([2, 3, 4, 1, 5, 0, 0]))
     numpy.testing.assert_array_almost_equal(array_dict["words"][2], numpy.array([2, 3, 1, 5, 0, 0, 0]))
     numpy.testing.assert_array_almost_equal(array_dict["words"][3], numpy.array([0, 0, 0, 0, 0, 0, 0]))
     numpy.testing.assert_array_almost_equal(array_dict["words"][4], numpy.array([0, 0, 0, 0, 0, 0, 0]))
Beispiel #8
0
    def test_all_fields_padded_to_max_length(self):
        field1 = TextField(["this", "is", "a", "sentence"], self.word_indexer)
        field2 = TextField(["this", "is", "a", "different", "sentence"], self.word_indexer)
        field3 = TextField(["this", "is", "another", "sentence"], self.word_indexer)

        list_field = ListField([field1, field2, field3])
        list_field.index(self.vocab)

        array_dict = list_field.as_array(list_field.get_padding_lengths())
        numpy.testing.assert_array_almost_equal(array_dict["words"][0], numpy.array([2, 3, 4, 5, 0]))
        numpy.testing.assert_array_almost_equal(array_dict["words"][1], numpy.array([2, 3, 4, 1, 5]))
        numpy.testing.assert_array_almost_equal(array_dict["words"][2], numpy.array([2, 3, 1, 5, 0]))
Beispiel #9
0
    def test_padding_handles_list_fields(self):
        array1 = ArrayField(numpy.ones([2, 3]))
        array2 = ArrayField(numpy.ones([1, 5]))
        empty_array = array1.empty_field()
        list_field = ListField([array1, array2, empty_array])

        returned_array = list_field.as_array(list_field.get_padding_lengths())
        correct_array = numpy.array([[[1., 1., 1., 0., 0.],
                                      [1., 1., 1., 0., 0.]],
                                     [[1., 1., 1., 1., 1.],
                                      [0., 0., 0., 0., 0.]],
                                     [[0., 0., 0., 0., 0.],
                                      [0., 0., 0., 0., 0.]]])
        numpy.testing.assert_array_equal(returned_array, correct_array)
Beispiel #10
0
    def test_fields_can_pad_to_greater_than_max_length(self):

        field1 = TextField(["this", "is", "a", "sentence"], self.word_indexer)
        field2 = TextField(["this", "is", "a", "different", "sentence"], self.word_indexer)
        field3 = TextField(["this", "is", "another", "sentence"], self.word_indexer)

        list_field = ListField([field1, field2, field3])
        list_field.index(self.vocab)
        padding_lengths = list_field.get_padding_lengths()
        padding_lengths["num_tokens"] = 7
        padding_lengths["num_fields"] = 5
        array_dict = list_field.as_array(padding_lengths)
        numpy.testing.assert_array_almost_equal(array_dict["words"][0], numpy.array([2, 3, 4, 5, 0, 0, 0]))
        numpy.testing.assert_array_almost_equal(array_dict["words"][1], numpy.array([2, 3, 4, 1, 5, 0, 0]))
        numpy.testing.assert_array_almost_equal(array_dict["words"][2], numpy.array([2, 3, 1, 5, 0, 0, 0]))
        numpy.testing.assert_array_almost_equal(array_dict["words"][3], numpy.array([0, 0, 0, 0, 0, 0, 0]))
        numpy.testing.assert_array_almost_equal(array_dict["words"][4], numpy.array([0, 0, 0, 0, 0, 0, 0]))