class TestSplit(unittest.TestCase): """Test splitting.""" triples_factory: TriplesFactory def setUp(self) -> None: """Set up the tests.""" self.triples_factory = Nations().training self.assertEqual(1592, self.triples_factory.num_triples) def test_split_naive(self): """Test splitting a factory in two with a given ratio.""" ratio = 0.8 train_triples_factory, test_triples_factory = self.triples_factory.split( ratio) expected_train_triples = int(self.triples_factory.num_triples * ratio) self.assertEqual(expected_train_triples, train_triples_factory.num_triples) self.assertEqual( self.triples_factory.num_triples - expected_train_triples, test_triples_factory.num_triples) def test_split_multi(self): """Test splitting a factory in three.""" ratios = r0, r1 = 0.80, 0.10 t0, t1, t2 = self.triples_factory.split(ratios) expected_0_triples = int(self.triples_factory.num_triples * r0) expected_1_triples = int(self.triples_factory.num_triples * r1) expected_2_triples = self.triples_factory.num_triples - expected_0_triples - expected_1_triples self.assertEqual(expected_0_triples, t0.num_triples) self.assertEqual(expected_1_triples, t1.num_triples) self.assertEqual(expected_2_triples, t2.num_triples)
def test_metadata(self): """Test metadata passing for triples factories.""" t = Nations().training self.assertEqual(NATIONS_TRAIN_PATH, t.metadata['path']) self.assertEqual( ( f'TriplesFactory(num_entities=14, num_relations=55, num_triples=1592,' f' inverse_triples=False, path="{NATIONS_TRAIN_PATH}")' ), repr(t), ) entities = ['poland', 'ussr'] x = t.new_with_restriction(entities=entities) entities_ids = t.entities_to_ids(entities=entities) self.assertEqual(NATIONS_TRAIN_PATH, x.metadata['path']) self.assertEqual( ( f'TriplesFactory(num_entities=14, num_relations=55, num_triples=37,' f' inverse_triples=False, entity_restriction={repr(entities_ids)}, path="{NATIONS_TRAIN_PATH}")' ), repr(x), ) relations = ['negativebehavior'] v = t.new_with_restriction(relations=relations) relations_ids = t.relations_to_ids(relations=relations) self.assertEqual(NATIONS_TRAIN_PATH, x.metadata['path']) self.assertEqual( ( f'TriplesFactory(num_entities=14, num_relations=55, num_triples=29,' f' inverse_triples=False, path="{NATIONS_TRAIN_PATH}", relation_restriction={repr(relations_ids)})' ), repr(v), ) w = t.clone_and_exchange_triples(t.triples[0:5], keep_metadata=False) self.assertIsInstance(w, TriplesFactory) self.assertNotIn('path', w.metadata) self.assertEqual( 'TriplesFactory(num_entities=14, num_relations=55, num_triples=5, inverse_triples=False)', repr(w), ) y, z = t.split() self.assertEqual(NATIONS_TRAIN_PATH, y.metadata['path']) self.assertEqual(NATIONS_TRAIN_PATH, z.metadata['path'])
class TestTriplesFactory(unittest.TestCase): """Class for testing triples factories.""" def setUp(self) -> None: """Instantiate test instance.""" self.factory = Nations().training def test_correct_inverse_creation(self): """Test if the triples and the corresponding inverses are created.""" t = [ ["e1", "a.", "e5"], ["e1", "a", "e2"], ] t = np.array(t, dtype=str) factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=True) instances = factory.create_slcwa_instances() assert len(instances) == 4 def test_automatic_incomplete_inverse_detection(self): """Test detecting that the triples contain inverses, warns about them, and filters them out.""" # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains # inverse relations, although the triples contained in it are not the same we would have when removing the # first triple, and passing create_inverse_triples=True. t = [ ["e3", f"a.{INVERSE_SUFFIX}", "e10"], ["e1", "a", "e2"], ["e1", "a.", "e5"], ] t = np.array(t, dtype=str) for create_inverse_triples in (False, True): with patch("pykeen.triples.triples_factory.logger.warning" ) as warning: factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=create_inverse_triples) # check for warning warning.assert_called() # check for filtered triples assert factory.num_triples == 2 # check for correct inverse triples flag assert factory.create_inverse_triples == create_inverse_triples def test_id_to_label(self): """Test ID-to-label conversion.""" for label_to_id, id_to_label in [ (self.factory.entity_to_id, self.factory.entity_id_to_label), (self.factory.relation_to_id, self.factory.relation_id_to_label), ]: for k in label_to_id.keys(): assert id_to_label[label_to_id[k]] == k for k in id_to_label.keys(): assert label_to_id[id_to_label[k]] == k def test_tensor_to_df(self): """Test tensor_to_df().""" # check correct translation labeled_triples = set( tuple(row) for row in self.factory.triples.tolist()) tensor = self.factory.mapped_triples scores = torch.rand(tensor.shape[0]) df = self.factory.tensor_to_df(tensor=tensor, scores=scores) re_labeled_triples = set( tuple(row) for row in df[["head_label", "relation_label", "tail_label"]].values.tolist()) assert labeled_triples == re_labeled_triples # check column order assert tuple(df.columns) == TRIPLES_DF_COLUMNS + ("scores", ) def _test_restriction( self, original_triples_factory: TriplesFactory, entity_restriction: Optional[Collection[str]], invert_entity_selection: bool, relation_restriction: Optional[Collection[str]], invert_relation_selection: bool, ): """Run the actual test for new_with_restriction.""" # apply restriction restricted_triples_factory = original_triples_factory.new_with_restriction( entities=entity_restriction, relations=relation_restriction, invert_entity_selection=invert_entity_selection, invert_relation_selection=invert_relation_selection, ) # check that the triples factory is returned as is, if and only if no restriction is to apply no_restriction_to_apply = entity_restriction is None and relation_restriction is None equal_factory_object = id(restricted_triples_factory) == id( original_triples_factory) assert no_restriction_to_apply == equal_factory_object # check that inverse_triples is correctly carried over assert original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples # verify that the label-to-ID mapping has not been changed assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id # verify that triples have been filtered if entity_restriction is not None: present_entities = set( restricted_triples_factory.triples[:, 0]).union( restricted_triples_factory.triples[:, 2]) expected_entities = (set( original_triples_factory.entity_id_to_label.values( )).difference(entity_restriction) if invert_entity_selection else entity_restriction) assert expected_entities.issuperset(present_entities) if relation_restriction is not None: present_relations = set(restricted_triples_factory.triples[:, 1]) expected_relations = ( set(original_triples_factory.relation_id_to_label.values()) if invert_relation_selection else set(relation_restriction)) assert expected_relations.issuperset(present_relations) def test_new_with_restriction(self): """Test new_with_restriction().""" relation_restriction = { "economicaid", "dependent", } entity_restriction = { "brazil", "burma", "china", } for inverse_triples in (True, False): original_triples_factory = Nations( create_inverse_triples=inverse_triples, ).training # Test different combinations of restrictions for ( (entity_restriction, invert_entity_selection), (relation_restriction, invert_relation_selection), ) in itt.product( ((None, None), (entity_restriction, False), (entity_restriction, True)), ((None, None), (relation_restriction, False), (relation_restriction, True)), ): with self.subTest( entity_restriction=entity_restriction, invert_entity_selection=invert_entity_selection, relation_restriction=relation_restriction, invert_relation_selection=invert_relation_selection, ): self._test_restriction( original_triples_factory=original_triples_factory, entity_restriction=entity_restriction, invert_entity_selection=invert_entity_selection, relation_restriction=relation_restriction, invert_relation_selection=invert_relation_selection, ) def test_create_lcwa_instances(self): """Test create_lcwa_instances.""" factory = Nations().training instances = factory.create_lcwa_instances() assert isinstance(instances, LCWAInstances) # check compressed triples # reconstruct triples from compressed form reconstructed_triples = set() for hr, row_id in zip(instances.pairs, range(instances.compressed.shape[0])): h, r = hr.tolist() _, tails = instances.compressed[row_id].nonzero() reconstructed_triples.update((h, r, t) for t in tails.tolist()) original_triples = { tuple(hrt) for hrt in factory.mapped_triples.tolist() } assert original_triples == reconstructed_triples # check data loader for batch in torch.utils.data.DataLoader(instances, batch_size=2): assert len(batch) == 2 assert all(torch.is_tensor(x) for x in batch) x, y = batch batch_size = x.shape[0] assert x.shape == (batch_size, 2) assert x.dtype == torch.long assert y.shape == (batch_size, factory.num_entities) assert y.dtype == torch.get_default_dtype() def test_split_inverse_triples(self): """Test whether inverse triples are only created in the training factory.""" # set create inverse triple to true self.factory.create_inverse_triples = True # split factory train, *others = self.factory.split() # check that in *training* inverse triple are to be created assert train.create_inverse_triples # check that in all other splits no inverse triples are to be created assert not any(f.create_inverse_triples for f in others)
class TestSplit(unittest.TestCase): """Test splitting.""" triples_factory: TriplesFactory def setUp(self) -> None: """Set up the tests.""" self.triples_factory = Nations().training self.assertEqual(1592, self.triples_factory.num_triples) def _test_invariants(self, training_triples_factory: TriplesFactory, *other_factories: TriplesFactory) -> None: """Test invariants for result of triples factory splitting.""" # verify that all entities and relations are present in the training factory assert training_triples_factory.num_entities == self.triples_factory.num_entities assert training_triples_factory.num_relations == self.triples_factory.num_relations all_factories = (training_triples_factory,) + other_factories # verify that no triple got lost self.assertEqual(sum(t.num_triples for t in all_factories), self.triples_factory.num_triples) # verify that the label-to-id mappings match self.assertSetEqual({ id(factory.entity_to_id) for factory in all_factories }, { id(self.triples_factory.entity_to_id) }) self.assertSetEqual({ id(factory.relation_to_id) for factory in all_factories }, { id(self.triples_factory.relation_to_id) }) def test_split_naive(self): """Test splitting a factory in two with a given ratio.""" ratio = 0.8 train_triples_factory, test_triples_factory = self.triples_factory.split(ratio) self._test_invariants(train_triples_factory, test_triples_factory) def test_split_multi(self): """Test splitting a factory in three.""" ratios = 0.80, 0.10 t0, t1, t2 = self.triples_factory.split(ratios) self._test_invariants(t0, t1, t2) def test_cleanup_deterministic(self): """Test that triples in a test set can get moved properly to the training set.""" training = np.array([ [1, 1000, 2], [1, 1000, 3], [1, 1001, 3], ]) testing = np.array([ [2, 1001, 3], [1, 1002, 4], ]) expected_training = [ [1, 1000, 2], [1, 1000, 3], [1, 1001, 3], [1, 1002, 4], ] expected_testing = [ [2, 1001, 3], ] new_training, new_testing = _tf_cleanup_deterministic(training, testing) self.assertEqual(expected_training, new_training.tolist()) self.assertEqual(expected_testing, new_testing.tolist()) new_testing, new_testing = _tf_cleanup_all([training, testing]) self.assertEqual(expected_training, new_training.tolist()) self.assertEqual(expected_testing, new_testing.tolist()) def test_cleanup_randomized(self): """Test that triples in a test set can get moved properly to the training set.""" training = np.array([ [1, 1000, 2], [1, 1000, 3], ]) testing = np.array([ [2, 1000, 3], [1, 1000, 4], [2, 1000, 4], [1, 1001, 3], ]) expected_training_1 = { (1, 1000, 2), (1, 1000, 3), (1, 1000, 4), (1, 1001, 3), } expected_testing_1 = { (2, 1000, 3), (2, 1000, 4), } expected_training_2 = { (1, 1000, 2), (1, 1000, 3), (2, 1000, 4), (1, 1001, 3), } expected_testing_2 = { (2, 1000, 3), (1, 1000, 4), } new_training, new_testing = [ set(tuple(row) for row in arr.tolist()) for arr in _tf_cleanup_randomized(training, testing) ] if expected_training_1 == new_training: self.assertEqual(expected_testing_1, new_testing) elif expected_training_2 == new_training: self.assertEqual(expected_testing_2, new_testing) else: self.fail('training was not correct')
class TestSplit(unittest.TestCase): """Test splitting.""" triples_factory: TriplesFactory def setUp(self) -> None: """Set up the tests.""" self.triples_factory = Nations().training self.assertEqual(1592, self.triples_factory.num_triples) def test_split_naive(self): """Test splitting a factory in two with a given ratio.""" ratio = 0.8 train_triples_factory, test_triples_factory = self.triples_factory.split( ratio) expected_train_triples = int(self.triples_factory.num_triples * ratio) self.assertEqual(expected_train_triples, train_triples_factory.num_triples) self.assertEqual( self.triples_factory.num_triples - expected_train_triples, test_triples_factory.num_triples) def test_split_multi(self): """Test splitting a factory in three.""" ratios = r0, r1 = 0.80, 0.10 t0, t1, t2 = self.triples_factory.split(ratios) expected_0_triples = int(self.triples_factory.num_triples * r0) expected_1_triples = int(self.triples_factory.num_triples * r1) expected_2_triples = self.triples_factory.num_triples - expected_0_triples - expected_1_triples self.assertEqual(expected_0_triples, t0.num_triples) self.assertEqual(expected_1_triples, t1.num_triples) self.assertEqual(expected_2_triples, t2.num_triples) def test_cleanup_deterministic(self): """Test that triples in a test set can get moved properly to the training set.""" training = np.array([ [1, 1000, 2], [1, 1000, 3], ]) testing = np.array([ [2, 1001, 3], [1, 1002, 4], ]) expected_training = [ [1, 1000, 2], [1, 1000, 3], [1, 1002, 4], ] expected_testing = [ [2, 1001, 3], ] new_training, new_testing = _tf_cleanup_deterministic( training, testing) self.assertEqual(expected_training, new_training.tolist()) self.assertEqual(expected_testing, new_testing.tolist()) new_testing, new_testing = _tf_cleanup_all([training, testing]) self.assertEqual(expected_training, new_training.tolist()) self.assertEqual(expected_testing, new_testing.tolist()) def test_cleanup_randomized(self): """Test that triples in a test set can get moved properly to the training set.""" training = np.array([ [1, 1000, 2], [1, 1000, 3], ]) testing = np.array([ [2, 1001, 3], [1, 1002, 4], [1, 1003, 4], ]) expected_training_1 = [ [1, 1000, 2], [1, 1000, 3], [1, 1002, 4], ] expected_testing_1 = [ [2, 1001, 3], [1, 1003, 4], ] expected_training_2 = [ [1, 1000, 2], [1, 1000, 3], [1, 1003, 4], ] expected_testing_2 = [ [2, 1001, 3], [1, 1002, 4], ] new_training, new_testing = _tf_cleanup_randomized(training, testing) if expected_training_1 == new_training.tolist(): self.assertEqual(expected_testing_1, new_testing.tolist()) elif expected_training_2 == new_training.tolist(): self.assertEqual(expected_testing_2, new_testing.tolist()) else: self.fail('training was not correct')
class TestSplit(unittest.TestCase): """Test splitting.""" triples_factory: TriplesFactory def setUp(self) -> None: """Set up the tests.""" self.triples_factory = Nations().training self.assertEqual(1592, self.triples_factory.num_triples) def _test_invariants(self, training_triples_factory: TriplesFactory, *other_factories: TriplesFactory) -> None: """Test invariants for result of triples factory splitting.""" # verify that all entities and relations are present in the training factory self.assertEqual(training_triples_factory.num_entities, self.triples_factory.num_entities) self.assertEqual(training_triples_factory.num_relations, self.triples_factory.num_relations) all_factories = (training_triples_factory, *other_factories) # verify that no triple got lost self.assertEqual(sum(t.num_triples for t in all_factories), self.triples_factory.num_triples) # verify that the label-to-id mappings match self.assertSetEqual( {id(factory.entity_to_id) for factory in all_factories}, { id(self.triples_factory.entity_to_id), }) self.assertSetEqual( {id(factory.relation_to_id) for factory in all_factories}, { id(self.triples_factory.relation_to_id), }) def test_split(self): """Test splitting a factory.""" cases = [ (2, 0.8), (2, [0.8]), (3, [0.80, 0.10]), (3, [0.80, 0.10, 0.10]), ] for method, (n, ratios), in itt.product(SPLIT_METHODS, cases): with self.subTest(method=method, ratios=ratios): factories = self.triples_factory.split(ratios, method=method) self.assertEqual(n, len(factories)) self._test_invariants(*factories) def test_cleanup_deterministic(self): """Test that triples in a test set can get moved properly to the training set.""" training = torch.as_tensor(data=[ [1, 1000, 2], [1, 1000, 3], [1, 1001, 3], ], dtype=torch.long) testing = torch.as_tensor(data=[ [2, 1001, 3], [1, 1002, 4], ], dtype=torch.long) expected_training = torch.as_tensor(data=[ [1, 1000, 2], [1, 1000, 3], [1, 1001, 3], [1, 1002, 4], ], dtype=torch.long) expected_testing = torch.as_tensor(data=[ [2, 1001, 3], ], dtype=torch.long) new_training, new_testing = _tf_cleanup_deterministic( training, testing) assert (expected_training == new_training).all() assert (expected_testing == new_testing).all() new_testing, new_testing = _tf_cleanup_all([training, testing]) assert (expected_training == new_training).all() assert (expected_testing == new_testing).all() def test_cleanup_randomized(self): """Test that triples in a test set can get moved properly to the training set.""" training = torch.as_tensor(data=[ [1, 1000, 2], [1, 1000, 3], ], dtype=torch.long) testing = torch.as_tensor(data=[ [2, 1000, 3], [1, 1000, 4], [2, 1000, 4], [1, 1001, 3], ], dtype=torch.long) expected_training_1 = { (1, 1000, 2), (1, 1000, 3), (1, 1000, 4), (1, 1001, 3), } expected_testing_1 = { (2, 1000, 3), (2, 1000, 4), } expected_training_2 = { (1, 1000, 2), (1, 1000, 3), (2, 1000, 4), (1, 1001, 3), } expected_testing_2 = { (2, 1000, 3), (1, 1000, 4), } new_training, new_testing = [ set(tuple(row) for row in arr.tolist()) for arr in _tf_cleanup_randomized(training, testing) ] if expected_training_1 == new_training: self.assertEqual(expected_testing_1, new_testing) elif expected_training_2 == new_training: self.assertEqual(expected_testing_2, new_testing) else: self.fail('training was not correct') def test_get_cover_deterministic(self): """Test _get_cover_deterministic.""" generated_triples = generate_triples() cover = _get_cover_deterministic(triples=generated_triples) # check type assert torch.is_tensor(cover) assert cover.dtype == torch.bool # check format assert cover.shape == (generated_triples.shape[0], ) # check coverage self.assertEqual( get_entities(generated_triples), get_entities(generated_triples[cover]), msg='entity coverage is not full', ) self.assertEqual( get_relations(generated_triples), get_relations(generated_triples[cover]), msg='relation coverage is not full', )