Example #1
0
class TestTriplesFactory(unittest.TestCase):
    """Class for testing triples factories."""
    def setUp(self) -> None:
        """Instantiate test instance."""
        self.factory = Nations().training

    def test_correct_inverse_creation(self):
        """Test if the triples and the corresponding inverses are created and sorted correctly."""
        t = [
            ['e1', 'a.', 'e5'],
            ['e1', 'a', 'e2'],
        ]
        t = np.array(t, dtype=np.str)
        factory = TriplesFactory(triples=t, create_inverse_triples=True)
        reference_relation_to_id = {
            'a': 0,
            f'a{INVERSE_SUFFIX}': 1,
            'a.': 2,
            f'a.{INVERSE_SUFFIX}': 3
        }
        self.assertEqual(reference_relation_to_id, factory.relation_to_id)

    def test_automatic_inverse_detection(self):
        """Test if the TriplesFactory detects that the triples contain inverses and creates correct ids."""
        t = [
            ['e3', f'a.{INVERSE_SUFFIX}', 'e10'],
            ['e1', 'a', 'e2'],
            ['e1', 'a.', 'e5'],
            ['e4', f'a{INVERSE_SUFFIX}', 'e5'],
        ]
        t = np.array(t, dtype=np.str)
        factory = TriplesFactory(triples=t, create_inverse_triples=False)
        reference_relation_to_id = {
            'a': 0,
            f'a{INVERSE_SUFFIX}': 1,
            'a.': 2,
            f'a.{INVERSE_SUFFIX}': 3
        }
        self.assertEqual(reference_relation_to_id, factory.relation_to_id)
        self.assertTrue(factory.create_inverse_triples)

    def test_automatic_incomplete_inverse_detection(self):
        """Test if the TriplesFactory detects that the triples contain incomplete inverses and creates correct ids."""
        t = [
            ['e3', f'a.{INVERSE_SUFFIX}', 'e10'],
            ['e1', 'a', 'e2'],
            ['e1', 'a.', 'e5'],
        ]
        t = np.array(t, dtype=np.str)
        factory = TriplesFactory(triples=t, create_inverse_triples=False)
        reference_relation_to_id = {
            'a': 0,
            f'a{INVERSE_SUFFIX}': 1,
            'a.': 2,
            f'a.{INVERSE_SUFFIX}': 3
        }
        self.assertEqual(reference_relation_to_id, factory.relation_to_id)
        self.assertTrue(factory.create_inverse_triples)

    def test_right_sorting(self):
        """Test if the triples and the corresponding inverses are sorted correctly."""
        t = [
            ['e1', 'a', 'e1'],
            ['e1', 'a.', 'e1'],
            ['e1', f'a.{INVERSE_SUFFIX}', 'e1'],
            ['e1', 'a.bc', 'e1'],
            ['e1', f'a.bc{INVERSE_SUFFIX}', 'e1'],
            ['e1', f'a{INVERSE_SUFFIX}', 'e1'],
            ['e1', 'abc', 'e1'],
            ['e1', f'abc{INVERSE_SUFFIX}', 'e1'],
        ]
        t = np.array(t, dtype=np.str)
        factory = TriplesFactory(triples=t, create_inverse_triples=False)
        reference_relation_to_id = {
            'a': 0,
            f'a{INVERSE_SUFFIX}': 1,
            'a.': 2,
            f'a.{INVERSE_SUFFIX}': 3,
            'a.bc': 4,
            f'a.bc{INVERSE_SUFFIX}': 5,
            'abc': 6,
            f'abc{INVERSE_SUFFIX}': 7,
        }
        self.assertEqual(reference_relation_to_id, factory.relation_to_id)

    def test_id_to_label(self):
        """Test ID-to-label conversion."""
        for label_to_id, id_to_label in [
            (self.factory.entity_to_id, self.factory.entity_id_to_label),
            (self.factory.relation_to_id, self.factory.relation_id_to_label),
        ]:
            for k in label_to_id.keys():
                assert id_to_label[label_to_id[k]] == k
            for k in id_to_label.keys():
                assert label_to_id[id_to_label[k]] == k

    def test_tensor_to_df(self):
        """Test tensor_to_df()."""
        # check correct translation
        labeled_triples = set(
            tuple(row) for row in self.factory.triples.tolist())
        tensor = self.factory.mapped_triples
        scores = torch.rand(tensor.shape[0])
        df = self.factory.tensor_to_df(tensor=tensor, scores=scores)
        re_labeled_triples = set(
            tuple(row) for row in
            df[['head_label', 'relation_label', 'tail_label']].values.tolist())
        assert labeled_triples == re_labeled_triples

        # check column order
        assert tuple(df.columns) == TRIPLES_DF_COLUMNS + ('scores', )

    def test_new_with_restriction(self):
        """Test new_with_restriction()."""
        example_relation_restriction = {
            'economicaid',
            'dependent',
        }
        example_entity_restriction = {
            'brazil',
            'burma',
            'china',
        }
        for inverse_triples in (True, False):
            original_triples_factory = Nations(
                create_inverse_triples=inverse_triples, ).training
            for entity_restriction in (None, example_entity_restriction):
                for relation_restriction in (None,
                                             example_relation_restriction):
                    # apply restriction
                    restricted_triples_factory = original_triples_factory.new_with_restriction(
                        entities=entity_restriction,
                        relations=relation_restriction,
                    )
                    # check that the triples factory is returned as is, if and only if no restriction is to apply
                    no_restriction_to_apply = (entity_restriction is None and
                                               relation_restriction is None)
                    equal_factory_object = (id(restricted_triples_factory) ==
                                            id(original_triples_factory))
                    assert no_restriction_to_apply == equal_factory_object

                    # check that inverse_triples is correctly carried over
                    assert (original_triples_factory.create_inverse_triples ==
                            restricted_triples_factory.create_inverse_triples)

                    # verify that the label-to-ID mapping has not been changed
                    assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id
                    assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id

                    # verify that triples have been filtered
                    if entity_restriction is not None:
                        present_relations = set(
                            restricted_triples_factory.triples[:, 0]).union(
                                restricted_triples_factory.triples[:, 2])
                        assert set(entity_restriction).issuperset(
                            present_relations)

                    if relation_restriction is not None:
                        present_relations = set(
                            restricted_triples_factory.triples[:, 1])
                        exp_relations = set(relation_restriction)
                        if original_triples_factory.create_inverse_triples:
                            exp_relations = exp_relations.union(
                                map(
                                    original_triples_factory.
                                    relation_to_inverse.get, exp_relations))
                        assert exp_relations.issuperset(present_relations)
Example #2
0
class TestTriplesFactory(unittest.TestCase):
    """Class for testing triples factories."""
    def setUp(self) -> None:
        """Instantiate test instance."""
        self.factory = Nations().training

    def test_correct_inverse_creation(self):
        """Test if the triples and the corresponding inverses are created."""
        t = [
            ["e1", "a.", "e5"],
            ["e1", "a", "e2"],
        ]
        t = np.array(t, dtype=str)
        factory = TriplesFactory.from_labeled_triples(
            triples=t, create_inverse_triples=True)
        instances = factory.create_slcwa_instances()
        assert len(instances) == 4

    def test_automatic_incomplete_inverse_detection(self):
        """Test detecting that the triples contain inverses, warns about them, and filters them out."""
        # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains
        # inverse relations, although the triples contained in it are not the same we would have when removing the
        # first triple, and passing create_inverse_triples=True.
        t = [
            ["e3", f"a.{INVERSE_SUFFIX}", "e10"],
            ["e1", "a", "e2"],
            ["e1", "a.", "e5"],
        ]
        t = np.array(t, dtype=str)
        for create_inverse_triples in (False, True):
            with patch("pykeen.triples.triples_factory.logger.warning"
                       ) as warning:
                factory = TriplesFactory.from_labeled_triples(
                    triples=t, create_inverse_triples=create_inverse_triples)
                # check for warning
                warning.assert_called()
                # check for filtered triples
                assert factory.num_triples == 2
                # check for correct inverse triples flag
                assert factory.create_inverse_triples == create_inverse_triples

    def test_id_to_label(self):
        """Test ID-to-label conversion."""
        for label_to_id, id_to_label in [
            (self.factory.entity_to_id, self.factory.entity_id_to_label),
            (self.factory.relation_to_id, self.factory.relation_id_to_label),
        ]:
            for k in label_to_id.keys():
                assert id_to_label[label_to_id[k]] == k
            for k in id_to_label.keys():
                assert label_to_id[id_to_label[k]] == k

    def test_tensor_to_df(self):
        """Test tensor_to_df()."""
        # check correct translation
        labeled_triples = set(
            tuple(row) for row in self.factory.triples.tolist())
        tensor = self.factory.mapped_triples
        scores = torch.rand(tensor.shape[0])
        df = self.factory.tensor_to_df(tensor=tensor, scores=scores)
        re_labeled_triples = set(
            tuple(row) for row in
            df[["head_label", "relation_label", "tail_label"]].values.tolist())
        assert labeled_triples == re_labeled_triples

        # check column order
        assert tuple(df.columns) == TRIPLES_DF_COLUMNS + ("scores", )

    def _test_restriction(
        self,
        original_triples_factory: TriplesFactory,
        entity_restriction: Optional[Collection[str]],
        invert_entity_selection: bool,
        relation_restriction: Optional[Collection[str]],
        invert_relation_selection: bool,
    ):
        """Run the actual test for new_with_restriction."""
        # apply restriction
        restricted_triples_factory = original_triples_factory.new_with_restriction(
            entities=entity_restriction,
            relations=relation_restriction,
            invert_entity_selection=invert_entity_selection,
            invert_relation_selection=invert_relation_selection,
        )

        # check that the triples factory is returned as is, if and only if no restriction is to apply
        no_restriction_to_apply = entity_restriction is None and relation_restriction is None
        equal_factory_object = id(restricted_triples_factory) == id(
            original_triples_factory)
        assert no_restriction_to_apply == equal_factory_object

        # check that inverse_triples is correctly carried over
        assert original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples

        # verify that the label-to-ID mapping has not been changed
        assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id
        assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id

        # verify that triples have been filtered
        if entity_restriction is not None:
            present_entities = set(
                restricted_triples_factory.triples[:, 0]).union(
                    restricted_triples_factory.triples[:, 2])
            expected_entities = (set(
                original_triples_factory.entity_id_to_label.values(
                )).difference(entity_restriction) if invert_entity_selection
                                 else entity_restriction)
            assert expected_entities.issuperset(present_entities)

        if relation_restriction is not None:
            present_relations = set(restricted_triples_factory.triples[:, 1])
            expected_relations = (
                set(original_triples_factory.relation_id_to_label.values())
                if invert_relation_selection else set(relation_restriction))
            assert expected_relations.issuperset(present_relations)

    def test_new_with_restriction(self):
        """Test new_with_restriction()."""
        relation_restriction = {
            "economicaid",
            "dependent",
        }
        entity_restriction = {
            "brazil",
            "burma",
            "china",
        }
        for inverse_triples in (True, False):
            original_triples_factory = Nations(
                create_inverse_triples=inverse_triples, ).training
            # Test different combinations of restrictions
            for (
                (entity_restriction, invert_entity_selection),
                (relation_restriction, invert_relation_selection),
            ) in itt.product(
                ((None, None), (entity_restriction, False),
                 (entity_restriction, True)),
                ((None, None), (relation_restriction, False),
                 (relation_restriction, True)),
            ):
                with self.subTest(
                        entity_restriction=entity_restriction,
                        invert_entity_selection=invert_entity_selection,
                        relation_restriction=relation_restriction,
                        invert_relation_selection=invert_relation_selection,
                ):
                    self._test_restriction(
                        original_triples_factory=original_triples_factory,
                        entity_restriction=entity_restriction,
                        invert_entity_selection=invert_entity_selection,
                        relation_restriction=relation_restriction,
                        invert_relation_selection=invert_relation_selection,
                    )

    def test_create_lcwa_instances(self):
        """Test create_lcwa_instances."""
        factory = Nations().training
        instances = factory.create_lcwa_instances()
        assert isinstance(instances, LCWAInstances)

        # check compressed triples
        # reconstruct triples from compressed form
        reconstructed_triples = set()
        for hr, row_id in zip(instances.pairs,
                              range(instances.compressed.shape[0])):
            h, r = hr.tolist()
            _, tails = instances.compressed[row_id].nonzero()
            reconstructed_triples.update((h, r, t) for t in tails.tolist())
        original_triples = {
            tuple(hrt)
            for hrt in factory.mapped_triples.tolist()
        }
        assert original_triples == reconstructed_triples

        # check data loader
        for batch in torch.utils.data.DataLoader(instances, batch_size=2):
            assert len(batch) == 2
            assert all(torch.is_tensor(x) for x in batch)
            x, y = batch
            batch_size = x.shape[0]
            assert x.shape == (batch_size, 2)
            assert x.dtype == torch.long
            assert y.shape == (batch_size, factory.num_entities)
            assert y.dtype == torch.get_default_dtype()

    def test_split_inverse_triples(self):
        """Test whether inverse triples are only created in the training factory."""
        # set create inverse triple to true
        self.factory.create_inverse_triples = True
        # split factory
        train, *others = self.factory.split()
        # check that in *training* inverse triple are to be created
        assert train.create_inverse_triples
        # check that in all other splits no inverse triples are to be created
        assert not any(f.create_inverse_triples for f in others)
Example #3
0
class TestTriplesFactory(unittest.TestCase):
    """Class for testing triples factories."""
    def setUp(self) -> None:
        """Instantiate test instance."""
        self.factory = Nations().training

    def test_correct_inverse_creation(self):
        """Test if the triples and the corresponding inverses are created."""
        t = [
            ['e1', 'a.', 'e5'],
            ['e1', 'a', 'e2'],
        ]
        t = np.array(t, dtype=np.str)
        factory = TriplesFactory.from_labeled_triples(
            triples=t, create_inverse_triples=True)
        instances = factory.create_slcwa_instances()
        assert len(instances) == 4

    def test_automatic_incomplete_inverse_detection(self):
        """Test detecting that the triples contain inverses, warns about them, and filters them out."""
        # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains
        # inverse relations, although the triples contained in it are not the same we would have when removing the
        # first triple, and passing create_inverse_triples=True.
        t = [
            ['e3', f'a.{INVERSE_SUFFIX}', 'e10'],
            ['e1', 'a', 'e2'],
            ['e1', 'a.', 'e5'],
        ]
        t = np.array(t, dtype=np.str)
        for create_inverse_triples in (False, True):
            with patch("pykeen.triples.triples_factory.logger.warning"
                       ) as warning:
                factory = TriplesFactory.from_labeled_triples(
                    triples=t, create_inverse_triples=create_inverse_triples)
                # check for warning
                warning.assert_called()
                # check for filtered triples
                assert factory.num_triples == 2
                # check for correct inverse triples flag
                assert factory.create_inverse_triples == create_inverse_triples

    def test_id_to_label(self):
        """Test ID-to-label conversion."""
        for label_to_id, id_to_label in [
            (self.factory.entity_to_id, self.factory.entity_id_to_label),
            (self.factory.relation_to_id, self.factory.relation_id_to_label),
        ]:
            for k in label_to_id.keys():
                assert id_to_label[label_to_id[k]] == k
            for k in id_to_label.keys():
                assert label_to_id[id_to_label[k]] == k

    def test_tensor_to_df(self):
        """Test tensor_to_df()."""
        # check correct translation
        labeled_triples = set(
            tuple(row) for row in self.factory.triples.tolist())
        tensor = self.factory.mapped_triples
        scores = torch.rand(tensor.shape[0])
        df = self.factory.tensor_to_df(tensor=tensor, scores=scores)
        re_labeled_triples = set(
            tuple(row) for row in
            df[['head_label', 'relation_label', 'tail_label']].values.tolist())
        assert labeled_triples == re_labeled_triples

        # check column order
        assert tuple(df.columns) == TRIPLES_DF_COLUMNS + ('scores', )

    def test_new_with_restriction(self):
        """Test new_with_restriction()."""
        example_relation_restriction = {
            'economicaid',
            'dependent',
        }
        example_entity_restriction = {
            'brazil',
            'burma',
            'china',
        }
        for inverse_triples in (True, False):
            original_triples_factory = Nations(
                create_inverse_triples=inverse_triples, ).training
            for entity_restriction in (None, example_entity_restriction):
                for relation_restriction in (None,
                                             example_relation_restriction):
                    # apply restriction
                    restricted_triples_factory = original_triples_factory.new_with_restriction(
                        entities=entity_restriction,
                        relations=relation_restriction,
                    )
                    # check that the triples factory is returned as is, if and only if no restriction is to apply
                    no_restriction_to_apply = (entity_restriction is None and
                                               relation_restriction is None)
                    equal_factory_object = (id(restricted_triples_factory) ==
                                            id(original_triples_factory))
                    assert no_restriction_to_apply == equal_factory_object

                    # check that inverse_triples is correctly carried over
                    assert (original_triples_factory.create_inverse_triples ==
                            restricted_triples_factory.create_inverse_triples)

                    # verify that the label-to-ID mapping has not been changed
                    assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id
                    assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id

                    # verify that triples have been filtered
                    if entity_restriction is not None:
                        present_entities = set(
                            restricted_triples_factory.triples[:, 0]).union(
                                restricted_triples_factory.triples[:, 2])
                        assert set(entity_restriction).issuperset(
                            present_entities)

                    if relation_restriction is not None:
                        present_relations = set(
                            restricted_triples_factory.triples[:, 1])
                        exp_relations = set(relation_restriction)
                        assert exp_relations.issuperset(present_relations)

    def test_create_lcwa_instances(self):
        """Test create_lcwa_instances."""
        factory = Nations().training
        instances = factory.create_lcwa_instances()
        assert isinstance(instances, LCWAInstances)

        # check compressed triples
        # reconstruct triples from compressed form
        reconstructed_triples = set()
        for hr, row_id in zip(instances.pairs,
                              range(instances.compressed.shape[0])):
            h, r = hr.tolist()
            _, tails = instances.compressed[row_id].nonzero()
            reconstructed_triples.update((h, r, t) for t in tails.tolist())
        original_triples = {
            tuple(hrt)
            for hrt in factory.mapped_triples.tolist()
        }
        assert original_triples == reconstructed_triples

        # check data loader
        for batch in torch.utils.data.DataLoader(instances, batch_size=2):
            assert len(batch) == 2
            assert all(torch.is_tensor(x) for x in batch)
            x, y = batch
            batch_size = x.shape[0]
            assert x.shape == (batch_size, 2)
            assert x.dtype == torch.long
            assert y.shape == (batch_size, factory.num_entities)
            assert y.dtype == torch.get_default_dtype()