Esempio n. 1
0
class TestSplit(unittest.TestCase):
    """Test splitting."""

    triples_factory: TriplesFactory

    def setUp(self) -> None:
        """Set up the tests."""
        self.triples_factory = Nations().training
        self.assertEqual(1592, self.triples_factory.num_triples)

    def test_split_naive(self):
        """Test splitting a factory in two with a given ratio."""
        ratio = 0.8
        train_triples_factory, test_triples_factory = self.triples_factory.split(
            ratio)
        expected_train_triples = int(self.triples_factory.num_triples * ratio)
        self.assertEqual(expected_train_triples,
                         train_triples_factory.num_triples)
        self.assertEqual(
            self.triples_factory.num_triples - expected_train_triples,
            test_triples_factory.num_triples)

    def test_split_multi(self):
        """Test splitting a factory in three."""
        ratios = r0, r1 = 0.80, 0.10
        t0, t1, t2 = self.triples_factory.split(ratios)
        expected_0_triples = int(self.triples_factory.num_triples * r0)
        expected_1_triples = int(self.triples_factory.num_triples * r1)
        expected_2_triples = self.triples_factory.num_triples - expected_0_triples - expected_1_triples
        self.assertEqual(expected_0_triples, t0.num_triples)
        self.assertEqual(expected_1_triples, t1.num_triples)
        self.assertEqual(expected_2_triples, t2.num_triples)
Esempio n. 2
0
    def test_metadata(self):
        """Test metadata passing for triples factories."""
        t = Nations().training
        self.assertEqual(NATIONS_TRAIN_PATH, t.metadata['path'])
        self.assertEqual(
            (
                f'TriplesFactory(num_entities=14, num_relations=55, num_triples=1592,'
                f' inverse_triples=False, path="{NATIONS_TRAIN_PATH}")'
            ),
            repr(t),
        )

        entities = ['poland', 'ussr']
        x = t.new_with_restriction(entities=entities)
        entities_ids = t.entities_to_ids(entities=entities)
        self.assertEqual(NATIONS_TRAIN_PATH, x.metadata['path'])
        self.assertEqual(
            (
                f'TriplesFactory(num_entities=14, num_relations=55, num_triples=37,'
                f' inverse_triples=False, entity_restriction={repr(entities_ids)}, path="{NATIONS_TRAIN_PATH}")'
            ),
            repr(x),
        )

        relations = ['negativebehavior']
        v = t.new_with_restriction(relations=relations)
        relations_ids = t.relations_to_ids(relations=relations)
        self.assertEqual(NATIONS_TRAIN_PATH, x.metadata['path'])
        self.assertEqual(
            (
                f'TriplesFactory(num_entities=14, num_relations=55, num_triples=29,'
                f' inverse_triples=False, path="{NATIONS_TRAIN_PATH}", relation_restriction={repr(relations_ids)})'
            ),
            repr(v),
        )

        w = t.clone_and_exchange_triples(t.triples[0:5], keep_metadata=False)
        self.assertIsInstance(w, TriplesFactory)
        self.assertNotIn('path', w.metadata)
        self.assertEqual(
            'TriplesFactory(num_entities=14, num_relations=55, num_triples=5, inverse_triples=False)',
            repr(w),
        )

        y, z = t.split()
        self.assertEqual(NATIONS_TRAIN_PATH, y.metadata['path'])
        self.assertEqual(NATIONS_TRAIN_PATH, z.metadata['path'])
Esempio n. 3
0
class TestTriplesFactory(unittest.TestCase):
    """Class for testing triples factories."""
    def setUp(self) -> None:
        """Instantiate test instance."""
        self.factory = Nations().training

    def test_correct_inverse_creation(self):
        """Test if the triples and the corresponding inverses are created."""
        t = [
            ["e1", "a.", "e5"],
            ["e1", "a", "e2"],
        ]
        t = np.array(t, dtype=str)
        factory = TriplesFactory.from_labeled_triples(
            triples=t, create_inverse_triples=True)
        instances = factory.create_slcwa_instances()
        assert len(instances) == 4

    def test_automatic_incomplete_inverse_detection(self):
        """Test detecting that the triples contain inverses, warns about them, and filters them out."""
        # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains
        # inverse relations, although the triples contained in it are not the same we would have when removing the
        # first triple, and passing create_inverse_triples=True.
        t = [
            ["e3", f"a.{INVERSE_SUFFIX}", "e10"],
            ["e1", "a", "e2"],
            ["e1", "a.", "e5"],
        ]
        t = np.array(t, dtype=str)
        for create_inverse_triples in (False, True):
            with patch("pykeen.triples.triples_factory.logger.warning"
                       ) as warning:
                factory = TriplesFactory.from_labeled_triples(
                    triples=t, create_inverse_triples=create_inverse_triples)
                # check for warning
                warning.assert_called()
                # check for filtered triples
                assert factory.num_triples == 2
                # check for correct inverse triples flag
                assert factory.create_inverse_triples == create_inverse_triples

    def test_id_to_label(self):
        """Test ID-to-label conversion."""
        for label_to_id, id_to_label in [
            (self.factory.entity_to_id, self.factory.entity_id_to_label),
            (self.factory.relation_to_id, self.factory.relation_id_to_label),
        ]:
            for k in label_to_id.keys():
                assert id_to_label[label_to_id[k]] == k
            for k in id_to_label.keys():
                assert label_to_id[id_to_label[k]] == k

    def test_tensor_to_df(self):
        """Test tensor_to_df()."""
        # check correct translation
        labeled_triples = set(
            tuple(row) for row in self.factory.triples.tolist())
        tensor = self.factory.mapped_triples
        scores = torch.rand(tensor.shape[0])
        df = self.factory.tensor_to_df(tensor=tensor, scores=scores)
        re_labeled_triples = set(
            tuple(row) for row in
            df[["head_label", "relation_label", "tail_label"]].values.tolist())
        assert labeled_triples == re_labeled_triples

        # check column order
        assert tuple(df.columns) == TRIPLES_DF_COLUMNS + ("scores", )

    def _test_restriction(
        self,
        original_triples_factory: TriplesFactory,
        entity_restriction: Optional[Collection[str]],
        invert_entity_selection: bool,
        relation_restriction: Optional[Collection[str]],
        invert_relation_selection: bool,
    ):
        """Run the actual test for new_with_restriction."""
        # apply restriction
        restricted_triples_factory = original_triples_factory.new_with_restriction(
            entities=entity_restriction,
            relations=relation_restriction,
            invert_entity_selection=invert_entity_selection,
            invert_relation_selection=invert_relation_selection,
        )

        # check that the triples factory is returned as is, if and only if no restriction is to apply
        no_restriction_to_apply = entity_restriction is None and relation_restriction is None
        equal_factory_object = id(restricted_triples_factory) == id(
            original_triples_factory)
        assert no_restriction_to_apply == equal_factory_object

        # check that inverse_triples is correctly carried over
        assert original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples

        # verify that the label-to-ID mapping has not been changed
        assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id
        assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id

        # verify that triples have been filtered
        if entity_restriction is not None:
            present_entities = set(
                restricted_triples_factory.triples[:, 0]).union(
                    restricted_triples_factory.triples[:, 2])
            expected_entities = (set(
                original_triples_factory.entity_id_to_label.values(
                )).difference(entity_restriction) if invert_entity_selection
                                 else entity_restriction)
            assert expected_entities.issuperset(present_entities)

        if relation_restriction is not None:
            present_relations = set(restricted_triples_factory.triples[:, 1])
            expected_relations = (
                set(original_triples_factory.relation_id_to_label.values())
                if invert_relation_selection else set(relation_restriction))
            assert expected_relations.issuperset(present_relations)

    def test_new_with_restriction(self):
        """Test new_with_restriction()."""
        relation_restriction = {
            "economicaid",
            "dependent",
        }
        entity_restriction = {
            "brazil",
            "burma",
            "china",
        }
        for inverse_triples in (True, False):
            original_triples_factory = Nations(
                create_inverse_triples=inverse_triples, ).training
            # Test different combinations of restrictions
            for (
                (entity_restriction, invert_entity_selection),
                (relation_restriction, invert_relation_selection),
            ) in itt.product(
                ((None, None), (entity_restriction, False),
                 (entity_restriction, True)),
                ((None, None), (relation_restriction, False),
                 (relation_restriction, True)),
            ):
                with self.subTest(
                        entity_restriction=entity_restriction,
                        invert_entity_selection=invert_entity_selection,
                        relation_restriction=relation_restriction,
                        invert_relation_selection=invert_relation_selection,
                ):
                    self._test_restriction(
                        original_triples_factory=original_triples_factory,
                        entity_restriction=entity_restriction,
                        invert_entity_selection=invert_entity_selection,
                        relation_restriction=relation_restriction,
                        invert_relation_selection=invert_relation_selection,
                    )

    def test_create_lcwa_instances(self):
        """Test create_lcwa_instances."""
        factory = Nations().training
        instances = factory.create_lcwa_instances()
        assert isinstance(instances, LCWAInstances)

        # check compressed triples
        # reconstruct triples from compressed form
        reconstructed_triples = set()
        for hr, row_id in zip(instances.pairs,
                              range(instances.compressed.shape[0])):
            h, r = hr.tolist()
            _, tails = instances.compressed[row_id].nonzero()
            reconstructed_triples.update((h, r, t) for t in tails.tolist())
        original_triples = {
            tuple(hrt)
            for hrt in factory.mapped_triples.tolist()
        }
        assert original_triples == reconstructed_triples

        # check data loader
        for batch in torch.utils.data.DataLoader(instances, batch_size=2):
            assert len(batch) == 2
            assert all(torch.is_tensor(x) for x in batch)
            x, y = batch
            batch_size = x.shape[0]
            assert x.shape == (batch_size, 2)
            assert x.dtype == torch.long
            assert y.shape == (batch_size, factory.num_entities)
            assert y.dtype == torch.get_default_dtype()

    def test_split_inverse_triples(self):
        """Test whether inverse triples are only created in the training factory."""
        # set create inverse triple to true
        self.factory.create_inverse_triples = True
        # split factory
        train, *others = self.factory.split()
        # check that in *training* inverse triple are to be created
        assert train.create_inverse_triples
        # check that in all other splits no inverse triples are to be created
        assert not any(f.create_inverse_triples for f in others)
class TestSplit(unittest.TestCase):
    """Test splitting."""

    triples_factory: TriplesFactory

    def setUp(self) -> None:
        """Set up the tests."""
        self.triples_factory = Nations().training
        self.assertEqual(1592, self.triples_factory.num_triples)

    def _test_invariants(self, training_triples_factory: TriplesFactory, *other_factories: TriplesFactory) -> None:
        """Test invariants for result of triples factory splitting."""
        # verify that all entities and relations are present in the training factory
        assert training_triples_factory.num_entities == self.triples_factory.num_entities
        assert training_triples_factory.num_relations == self.triples_factory.num_relations

        all_factories = (training_triples_factory,) + other_factories

        # verify that no triple got lost
        self.assertEqual(sum(t.num_triples for t in all_factories), self.triples_factory.num_triples)

        # verify that the label-to-id mappings match
        self.assertSetEqual({
            id(factory.entity_to_id)
            for factory in all_factories
        }, {
            id(self.triples_factory.entity_to_id)
        })
        self.assertSetEqual({
            id(factory.relation_to_id)
            for factory in all_factories
        }, {
            id(self.triples_factory.relation_to_id)
        })

    def test_split_naive(self):
        """Test splitting a factory in two with a given ratio."""
        ratio = 0.8
        train_triples_factory, test_triples_factory = self.triples_factory.split(ratio)
        self._test_invariants(train_triples_factory, test_triples_factory)

    def test_split_multi(self):
        """Test splitting a factory in three."""
        ratios = 0.80, 0.10
        t0, t1, t2 = self.triples_factory.split(ratios)
        self._test_invariants(t0, t1, t2)

    def test_cleanup_deterministic(self):
        """Test that triples in a test set can get moved properly to the training set."""
        training = np.array([
            [1, 1000, 2],
            [1, 1000, 3],
            [1, 1001, 3],
        ])
        testing = np.array([
            [2, 1001, 3],
            [1, 1002, 4],
        ])
        expected_training = [
            [1, 1000, 2],
            [1, 1000, 3],
            [1, 1001, 3],
            [1, 1002, 4],
        ]
        expected_testing = [
            [2, 1001, 3],
        ]

        new_training, new_testing = _tf_cleanup_deterministic(training, testing)
        self.assertEqual(expected_training, new_training.tolist())
        self.assertEqual(expected_testing, new_testing.tolist())

        new_testing, new_testing = _tf_cleanup_all([training, testing])
        self.assertEqual(expected_training, new_training.tolist())
        self.assertEqual(expected_testing, new_testing.tolist())

    def test_cleanup_randomized(self):
        """Test that triples in a test set can get moved properly to the training set."""
        training = np.array([
            [1, 1000, 2],
            [1, 1000, 3],
        ])
        testing = np.array([
            [2, 1000, 3],
            [1, 1000, 4],
            [2, 1000, 4],
            [1, 1001, 3],
        ])
        expected_training_1 = {
            (1, 1000, 2),
            (1, 1000, 3),
            (1, 1000, 4),
            (1, 1001, 3),
        }
        expected_testing_1 = {
            (2, 1000, 3),
            (2, 1000, 4),
        }

        expected_training_2 = {
            (1, 1000, 2),
            (1, 1000, 3),
            (2, 1000, 4),
            (1, 1001, 3),
        }
        expected_testing_2 = {
            (2, 1000, 3),
            (1, 1000, 4),
        }

        new_training, new_testing = [
            set(tuple(row) for row in arr.tolist())
            for arr in _tf_cleanup_randomized(training, testing)
        ]

        if expected_training_1 == new_training:
            self.assertEqual(expected_testing_1, new_testing)
        elif expected_training_2 == new_training:
            self.assertEqual(expected_testing_2, new_testing)
        else:
            self.fail('training was not correct')
Esempio n. 5
0
class TestSplit(unittest.TestCase):
    """Test splitting."""

    triples_factory: TriplesFactory

    def setUp(self) -> None:
        """Set up the tests."""
        self.triples_factory = Nations().training
        self.assertEqual(1592, self.triples_factory.num_triples)

    def test_split_naive(self):
        """Test splitting a factory in two with a given ratio."""
        ratio = 0.8
        train_triples_factory, test_triples_factory = self.triples_factory.split(
            ratio)
        expected_train_triples = int(self.triples_factory.num_triples * ratio)
        self.assertEqual(expected_train_triples,
                         train_triples_factory.num_triples)
        self.assertEqual(
            self.triples_factory.num_triples - expected_train_triples,
            test_triples_factory.num_triples)

    def test_split_multi(self):
        """Test splitting a factory in three."""
        ratios = r0, r1 = 0.80, 0.10
        t0, t1, t2 = self.triples_factory.split(ratios)
        expected_0_triples = int(self.triples_factory.num_triples * r0)
        expected_1_triples = int(self.triples_factory.num_triples * r1)
        expected_2_triples = self.triples_factory.num_triples - expected_0_triples - expected_1_triples
        self.assertEqual(expected_0_triples, t0.num_triples)
        self.assertEqual(expected_1_triples, t1.num_triples)
        self.assertEqual(expected_2_triples, t2.num_triples)

    def test_cleanup_deterministic(self):
        """Test that triples in a test set can get moved properly to the training set."""
        training = np.array([
            [1, 1000, 2],
            [1, 1000, 3],
        ])
        testing = np.array([
            [2, 1001, 3],
            [1, 1002, 4],
        ])
        expected_training = [
            [1, 1000, 2],
            [1, 1000, 3],
            [1, 1002, 4],
        ]
        expected_testing = [
            [2, 1001, 3],
        ]

        new_training, new_testing = _tf_cleanup_deterministic(
            training, testing)
        self.assertEqual(expected_training, new_training.tolist())
        self.assertEqual(expected_testing, new_testing.tolist())

        new_testing, new_testing = _tf_cleanup_all([training, testing])
        self.assertEqual(expected_training, new_training.tolist())
        self.assertEqual(expected_testing, new_testing.tolist())

    def test_cleanup_randomized(self):
        """Test that triples in a test set can get moved properly to the training set."""
        training = np.array([
            [1, 1000, 2],
            [1, 1000, 3],
        ])
        testing = np.array([
            [2, 1001, 3],
            [1, 1002, 4],
            [1, 1003, 4],
        ])
        expected_training_1 = [
            [1, 1000, 2],
            [1, 1000, 3],
            [1, 1002, 4],
        ]
        expected_testing_1 = [
            [2, 1001, 3],
            [1, 1003, 4],
        ]

        expected_training_2 = [
            [1, 1000, 2],
            [1, 1000, 3],
            [1, 1003, 4],
        ]
        expected_testing_2 = [
            [2, 1001, 3],
            [1, 1002, 4],
        ]

        new_training, new_testing = _tf_cleanup_randomized(training, testing)

        if expected_training_1 == new_training.tolist():
            self.assertEqual(expected_testing_1, new_testing.tolist())
        elif expected_training_2 == new_training.tolist():
            self.assertEqual(expected_testing_2, new_testing.tolist())
        else:
            self.fail('training was not correct')
Esempio n. 6
0
class TestSplit(unittest.TestCase):
    """Test splitting."""

    triples_factory: TriplesFactory

    def setUp(self) -> None:
        """Set up the tests."""
        self.triples_factory = Nations().training
        self.assertEqual(1592, self.triples_factory.num_triples)

    def _test_invariants(self, training_triples_factory: TriplesFactory,
                         *other_factories: TriplesFactory) -> None:
        """Test invariants for result of triples factory splitting."""
        # verify that all entities and relations are present in the training factory
        self.assertEqual(training_triples_factory.num_entities,
                         self.triples_factory.num_entities)
        self.assertEqual(training_triples_factory.num_relations,
                         self.triples_factory.num_relations)

        all_factories = (training_triples_factory, *other_factories)

        # verify that no triple got lost
        self.assertEqual(sum(t.num_triples for t in all_factories),
                         self.triples_factory.num_triples)

        # verify that the label-to-id mappings match
        self.assertSetEqual(
            {id(factory.entity_to_id)
             for factory in all_factories}, {
                 id(self.triples_factory.entity_to_id),
             })
        self.assertSetEqual(
            {id(factory.relation_to_id)
             for factory in all_factories}, {
                 id(self.triples_factory.relation_to_id),
             })

    def test_split(self):
        """Test splitting a factory."""
        cases = [
            (2, 0.8),
            (2, [0.8]),
            (3, [0.80, 0.10]),
            (3, [0.80, 0.10, 0.10]),
        ]
        for method, (n, ratios), in itt.product(SPLIT_METHODS, cases):
            with self.subTest(method=method, ratios=ratios):
                factories = self.triples_factory.split(ratios, method=method)
                self.assertEqual(n, len(factories))
                self._test_invariants(*factories)

    def test_cleanup_deterministic(self):
        """Test that triples in a test set can get moved properly to the training set."""
        training = torch.as_tensor(data=[
            [1, 1000, 2],
            [1, 1000, 3],
            [1, 1001, 3],
        ],
                                   dtype=torch.long)
        testing = torch.as_tensor(data=[
            [2, 1001, 3],
            [1, 1002, 4],
        ],
                                  dtype=torch.long)
        expected_training = torch.as_tensor(data=[
            [1, 1000, 2],
            [1, 1000, 3],
            [1, 1001, 3],
            [1, 1002, 4],
        ],
                                            dtype=torch.long)
        expected_testing = torch.as_tensor(data=[
            [2, 1001, 3],
        ],
                                           dtype=torch.long)

        new_training, new_testing = _tf_cleanup_deterministic(
            training, testing)
        assert (expected_training == new_training).all()
        assert (expected_testing == new_testing).all()

        new_testing, new_testing = _tf_cleanup_all([training, testing])
        assert (expected_training == new_training).all()
        assert (expected_testing == new_testing).all()

    def test_cleanup_randomized(self):
        """Test that triples in a test set can get moved properly to the training set."""
        training = torch.as_tensor(data=[
            [1, 1000, 2],
            [1, 1000, 3],
        ],
                                   dtype=torch.long)
        testing = torch.as_tensor(data=[
            [2, 1000, 3],
            [1, 1000, 4],
            [2, 1000, 4],
            [1, 1001, 3],
        ],
                                  dtype=torch.long)
        expected_training_1 = {
            (1, 1000, 2),
            (1, 1000, 3),
            (1, 1000, 4),
            (1, 1001, 3),
        }
        expected_testing_1 = {
            (2, 1000, 3),
            (2, 1000, 4),
        }

        expected_training_2 = {
            (1, 1000, 2),
            (1, 1000, 3),
            (2, 1000, 4),
            (1, 1001, 3),
        }
        expected_testing_2 = {
            (2, 1000, 3),
            (1, 1000, 4),
        }

        new_training, new_testing = [
            set(tuple(row) for row in arr.tolist())
            for arr in _tf_cleanup_randomized(training, testing)
        ]

        if expected_training_1 == new_training:
            self.assertEqual(expected_testing_1, new_testing)
        elif expected_training_2 == new_training:
            self.assertEqual(expected_testing_2, new_testing)
        else:
            self.fail('training was not correct')

    def test_get_cover_deterministic(self):
        """Test _get_cover_deterministic."""
        generated_triples = generate_triples()
        cover = _get_cover_deterministic(triples=generated_triples)

        # check type
        assert torch.is_tensor(cover)
        assert cover.dtype == torch.bool
        # check format
        assert cover.shape == (generated_triples.shape[0], )

        # check coverage
        self.assertEqual(
            get_entities(generated_triples),
            get_entities(generated_triples[cover]),
            msg='entity coverage is not full',
        )
        self.assertEqual(
            get_relations(generated_triples),
            get_relations(generated_triples[cover]),
            msg='relation coverage is not full',
        )