def test_list_field_can_handle_empty_index_fields(self):
     list_field = ListField(
         [self.index_field, self.index_field, self.empty_index_field])
     list_field.index(self.vocab)
     tensor = list_field.as_tensor(list_field.get_padding_lengths())
     numpy.testing.assert_array_equal(tensor.detach().cpu().numpy(),
                                      numpy.array([[1], [1], [-1]]))
 def test_list_field_can_handle_empty_text_fields(self):
     list_field = ListField(
         [self.field1, self.field2, self.empty_text_field])
     list_field.index(self.vocab)
     tensor_dict = list_field.as_tensor(list_field.get_padding_lengths())
     numpy.testing.assert_array_equal(
         tensor_dict["words"].detach().cpu().numpy(),
         numpy.array([[2, 3, 4, 5, 0], [2, 3, 4, 1, 5], [0, 0, 0, 0, 0]]))
 def test_all_fields_padded_to_max_length(self):
     list_field = ListField([self.field1, self.field2, self.field3])
     list_field.index(self.vocab)
     tensor_dict = list_field.as_tensor(list_field.get_padding_lengths())
     numpy.testing.assert_array_almost_equal(
         tensor_dict["words"][0].detach().cpu().numpy(),
         numpy.array([2, 3, 4, 5, 0]))
     numpy.testing.assert_array_almost_equal(
         tensor_dict["words"][1].detach().cpu().numpy(),
         numpy.array([2, 3, 4, 1, 5]))
     numpy.testing.assert_array_almost_equal(
         tensor_dict["words"][2].detach().cpu().numpy(),
         numpy.array([2, 3, 1, 5, 0]))
 def test_nested_list_fields_are_padded_correctly(self):
     nested_field1 = ListField(
         [LabelField(c) for c in ['a', 'b', 'c', 'd', 'e']])
     nested_field2 = ListField(
         [LabelField(c) for c in ['f', 'g', 'h', 'i', 'j', 'k']])
     list_field = ListField(
         [nested_field1.empty_field(), nested_field1, nested_field2])
     list_field.index(self.vocab)
     padding_lengths = list_field.get_padding_lengths()
     assert padding_lengths == {'num_fields': 3, 'list_num_fields': 6}
     tensor = list_field.as_tensor(padding_lengths).detach().cpu().numpy()
     numpy.testing.assert_almost_equal(
         tensor, [[-1, -1, -1, -1, -1, -1], [0, 1, 2, 3, 4, -1],
                  [5, 6, 7, 8, 9, 10]])
    def test_padding_handles_list_fields_with_padding_values(self):
        array1 = ArrayField(numpy.ones([2, 3]), padding_value=-1)
        array2 = ArrayField(numpy.ones([1, 5]), padding_value=-1)
        empty_array = array1.empty_field()
        list_field = ListField([array1, array2, empty_array])

        returned_tensor = list_field.as_tensor(
            list_field.get_padding_lengths()).detach().cpu().numpy()
        correct_tensor = numpy.array([[[1., 1., 1., -1., -1.],
                                       [1., 1., 1., -1., -1.]],
                                      [[1., 1., 1., 1., 1.],
                                       [-1., -1., -1., -1., -1.]],
                                      [[-1., -1., -1., -1., -1.],
                                       [-1., -1., -1., -1., -1.]]])
        numpy.testing.assert_array_equal(returned_tensor, correct_tensor)
Exemple #6
0
    def test_doubly_nested_field_works(self):
        field1 = ProductionRuleField('S -> [NP, VP]', is_global_rule=True)
        field2 = ProductionRuleField('NP -> test', is_global_rule=True)
        field3 = ProductionRuleField('VP -> eat', is_global_rule=False)
        list_field = ListField([ListField([field1, field2, field3]),
                                ListField([field1, field2])])
        list_field.index(self.vocab)
        padding_lengths = list_field.get_padding_lengths()
        tensors = list_field.as_tensor(padding_lengths)
        assert isinstance(tensors, list)
        assert len(tensors) == 2
        assert isinstance(tensors[0], list)
        assert len(tensors[0]) == 3
        assert isinstance(tensors[1], list)
        assert len(tensors[1]) == 3

        tensor_tuple = tensors[0][0]
        assert tensor_tuple[0] == 'S -> [NP, VP]'
        assert tensor_tuple[1] is True
        assert_almost_equal(tensor_tuple[2].detach().cpu().numpy(), [self.s_rule_index])

        tensor_tuple = tensors[0][1]
        assert tensor_tuple[0] == 'NP -> test'
        assert tensor_tuple[1] is True
        assert_almost_equal(tensor_tuple[2].detach().cpu().numpy(), [self.np_index])

        tensor_tuple = tensors[0][2]
        assert tensor_tuple[0] == 'VP -> eat'
        assert tensor_tuple[1] is False
        assert tensor_tuple[2] is None

        tensor_tuple = tensors[1][0]
        assert tensor_tuple[0] == 'S -> [NP, VP]'
        assert tensor_tuple[1] is True
        assert_almost_equal(tensor_tuple[2].detach().cpu().numpy(), [self.s_rule_index])

        tensor_tuple = tensors[1][1]
        assert tensor_tuple[0] == 'NP -> test'
        assert tensor_tuple[1] is True
        assert_almost_equal(tensor_tuple[2].detach().cpu().numpy(), [self.np_index])

        # This item was just padding.
        tensor_tuple = tensors[1][2]
        assert tensor_tuple[0] == ''
        assert tensor_tuple[1] is False
        assert tensor_tuple[2] is None
    def test_as_tensor_can_handle_multiple_token_indexers(self):
        # pylint: disable=protected-access
        self.field1._token_indexers = self.words_and_characters_indexers
        self.field2._token_indexers = self.words_and_characters_indexers
        self.field3._token_indexers = self.words_and_characters_indexers

        list_field = ListField([self.field1, self.field2, self.field3])
        list_field.index(self.vocab)
        padding_lengths = list_field.get_padding_lengths()
        tensor_dict = list_field.as_tensor(padding_lengths)
        words = tensor_dict["words"].detach().cpu().numpy()
        characters = tensor_dict["characters"].detach().cpu().numpy()
        numpy.testing.assert_array_almost_equal(
            words,
            numpy.array([[2, 3, 4, 5, 0], [2, 3, 4, 1, 5], [2, 3, 1, 5, 0]]))

        numpy.testing.assert_array_almost_equal(
            characters[0],
            numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0],
                         [1, 2, 0, 0, 0, 0, 0, 0, 0],
                         [1, 0, 0, 0, 0, 0, 0, 0, 0],
                         [2, 3, 4, 5, 3, 4, 6, 3, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0]]))

        numpy.testing.assert_array_almost_equal(
            characters[1],
            numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0],
                         [1, 2, 0, 0, 0, 0, 0, 0, 0],
                         [1, 0, 0, 0, 0, 0, 0, 0, 0],
                         [1, 1, 1, 1, 3, 1, 3, 4, 5],
                         [2, 3, 4, 5, 3, 4, 6, 3, 0]]))

        numpy.testing.assert_array_almost_equal(
            characters[2],
            numpy.array([[5, 1, 1, 2, 0, 0, 0, 0, 0],
                         [1, 2, 0, 0, 0, 0, 0, 0, 0],
                         [1, 4, 1, 5, 1, 3, 1, 0, 0],
                         [2, 3, 4, 5, 3, 4, 6, 3, 0],
                         [0, 0, 0, 0, 0, 0, 0, 0, 0]]))
 def test_fields_can_pad_to_greater_than_max_length(self):
     list_field = ListField([self.field1, self.field2, self.field3])
     list_field.index(self.vocab)
     padding_lengths = list_field.get_padding_lengths()
     padding_lengths["list_num_tokens"] = 7
     padding_lengths["num_fields"] = 5
     tensor_dict = list_field.as_tensor(padding_lengths)
     numpy.testing.assert_array_almost_equal(
         tensor_dict["words"][0].detach().cpu().numpy(),
         numpy.array([2, 3, 4, 5, 0, 0, 0]))
     numpy.testing.assert_array_almost_equal(
         tensor_dict["words"][1].detach().cpu().numpy(),
         numpy.array([2, 3, 4, 1, 5, 0, 0]))
     numpy.testing.assert_array_almost_equal(
         tensor_dict["words"][2].detach().cpu().numpy(),
         numpy.array([2, 3, 1, 5, 0, 0, 0]))
     numpy.testing.assert_array_almost_equal(
         tensor_dict["words"][3].detach().cpu().numpy(),
         numpy.array([0, 0, 0, 0, 0, 0, 0]))
     numpy.testing.assert_array_almost_equal(
         tensor_dict["words"][4].detach().cpu().numpy(),
         numpy.array([0, 0, 0, 0, 0, 0, 0]))
 def test_get_padding_lengths(self):
     list_field = ListField([self.field1, self.field2, self.field3])
     list_field.index(self.vocab)
     lengths = list_field.get_padding_lengths()
     assert lengths == {"num_fields": 3, "list_num_tokens": 5}