def test_inverse_triples(self): """Test that the right number of entities and triples exist after inverting them.""" triples_factory = TriplesFactory.from_labeled_triples( triples=triples, create_inverse_triples=True) self.assertEqual(4, triples_factory.num_relations) self.assertEqual( set(range(triples_factory.num_entities)), set(triples_factory.entity_to_id.values()), msg="wrong number entities", ) self.assertEqual( set(range(triples_factory.real_num_relations)), set(triples_factory.relation_to_id.values()), msg="wrong number relations", ) relations = set(triples[:, 1]) entities = set(triples[:, 0]).union(triples[:, 2]) self.assertEqual(len(entities), triples_factory.num_entities, msg="wrong number entities") self.assertEqual(2, len(relations), msg="Wrong number of relations in set") self.assertEqual( 2 * len(relations), triples_factory.num_relations, msg="Wrong number of relations in factory", )
def test_count_inverse_frequencies(self): """Test counting inverse frequencies. Note, for r3, there are three triples, but the inverse triples are only counted once. """ t = [ ['a', 'r1', 'b'], # ['b', 'r2', 'c'], ['c', 'r2_inverse', 'b'], ['d', 'r2', 'e'], ['e', 'r2_inverse', 'd'], # ['g', 'r3', 'h'], ['h', 'r3_inverse', 'g'], ['i', 'r3', 'j'], ['k', 'r3', 'l'], ] triples_factory = TriplesFactory.from_labeled_triples( triples=np.array(t, dtype=np.str)) frequencies = get_candidate_inverse_relations(triples_factory, minimum_frequency=0.0, symmetric=False) self.assertEqual( { ('r2', 'r2_inverse'): (2 / 2), ('r2_inverse', 'r2'): (2 / 2), ('r3', 'r3_inverse'): (1 / 3), ('r3_inverse', 'r3'): (1 / 1), }, dict(frequencies), )
def test_right_sorting(self): """Test if the triples and the corresponding inverses are sorted correctly.""" t = [ ['e1', 'a', 'e1'], ['e1', 'a.', 'e1'], ['e1', f'a.{INVERSE_SUFFIX}', 'e1'], ['e1', 'a.bc', 'e1'], ['e1', f'a.bc{INVERSE_SUFFIX}', 'e1'], ['e1', f'a{INVERSE_SUFFIX}', 'e1'], ['e1', 'abc', 'e1'], ['e1', f'abc{INVERSE_SUFFIX}', 'e1'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=False) reference_relation_to_id = { 'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3, 'a.bc': 4, f'a.bc{INVERSE_SUFFIX}': 5, 'abc': 6, f'abc{INVERSE_SUFFIX}': 7, } self.assertEqual(reference_relation_to_id, factory.relation_to_id)
def test_inverse_triples(self): """Test that the right number of entities and triples exist after inverting them.""" triples_factory = TriplesFactory.from_labeled_triples( triples=triples, create_inverse_triples=True) self.assertEqual(0, triples_factory.num_relations % 2) self.assertEqual( set(range(triples_factory.num_entities)), set(triples_factory.entity_to_id.values()), msg='wrong number entities', ) self.assertEqual( set(range(triples_factory.num_relations)), set(triples_factory.relation_to_id.values()), msg='wrong number relations', ) relations = set(triples[:, 1]) entities = set(triples[:, 0]).union(triples[:, 2]) self.assertEqual(len(entities), triples_factory.num_entities, msg='wrong number entities') self.assertEqual(2, len(relations), msg='Wrong number of relations in set') self.assertEqual( 2 * len(relations), triples_factory.num_relations, msg='Wrong number of relations in factory', ) self.assertIn(f'likes{INVERSE_SUFFIX}', triples_factory.relation_to_id)
def test_triples(self): """Test properties of the triples factory.""" triples_factory = TriplesFactory.from_labeled_triples(triples=triples) self.assertEqual(set(range(triples_factory.num_entities)), set(triples_factory.entity_to_id.values())) self.assertEqual(set(range(triples_factory.num_relations)), set(triples_factory.relation_to_id.values())) self.assertTrue((triples_factory.mapped_triples == triples_factory.map_triples_to_id(triples)).all())
def test_correct_inverse_creation(self): """Test if the triples and the corresponding inverses are created.""" t = [ ['e1', 'a.', 'e5'], ['e1', 'a', 'e2'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory.from_labeled_triples(triples=t, create_inverse_triples=True) instances = factory.create_slcwa_instances() assert len(instances) == 4
def test_correct_inverse_creation(self): """Test if the triples and the corresponding inverses are created and sorted correctly.""" t = [ ['e1', 'a.', 'e5'], ['e1', 'a', 'e2'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=True) reference_relation_to_id = { 'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3 } self.assertEqual(reference_relation_to_id, factory.relation_to_id)
def _load_helper(self, relative_path) -> TriplesFactory: relative_path = path.join(self.dataset_path, relative_path) with open(relative_path) as file: df = pd.read_csv( file, usecols=[ self.head_column, self.relation_column, self.tail_column ], header=self.header, sep=self.sep, ) entity_to_id = None relation_to_id = None if self.entity_to_id_path: node_mapping = pd.read_csv(self.entity_to_id_path, sep=self.entity_to_id_sep, header=None) entity_to_id = { label: id for label, id in zip( node_mapping[self.entity_to_id_label_col], node_mapping[self.entity_to_id_id_col]) } if self.relation_to_id_path: relation_mapping = pd.read_csv(self.relation_to_id_path, sep=self.relation_to_id_sep, header=None) relation_to_id = { label: id for label, id in zip( relation_mapping[self.relation_to_id_label_col], relation_mapping[self.relation_to_id_id_col]) } rv = TriplesFactory.from_labeled_triples( triples=df.values, entity_to_id=entity_to_id, relation_to_id=relation_to_id) rv.path = relative_path return rv
def test_automatic_incomplete_inverse_detection(self): """Test if the TriplesFactory detects that the triples contain incomplete inverses and creates correct ids.""" t = [ ['e3', f'a.{INVERSE_SUFFIX}', 'e10'], ['e1', 'a', 'e2'], ['e1', 'a.', 'e5'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory.from_labeled_triples( triples=t, create_inverse_triples=False) reference_relation_to_id = { 'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3 } self.assertEqual(reference_relation_to_id, factory.relation_to_id) self.assertTrue(factory.create_inverse_triples)
def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # noqa: D102 kwargs = super()._pre_instantiation_hook(kwargs=kwargs) # TODO: use triple generation # generate random triples mapped_triples = numpy.stack([ numpy.random.randint(max_id, size=(self.num_triples,)) for max_id in (self.num, self.num_relations, self.num) ], axis=-1) entity_names = [f"e_{i}" for i in range(self.num)] relation_names = [f"r_{i}" for i in range(self.num_relations)] triples = numpy.stack([ [names[i] for i in col.tolist()] for col, names in zip( mapped_triples.T, (entity_names, relation_names, entity_names), ) ]) kwargs["triples_factory"] = TriplesFactory.from_labeled_triples(triples=triples) return kwargs
def test_automatic_incomplete_inverse_detection(self): """Test detecting that the triples contain inverses, warns about them, and filters them out.""" # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains # inverse relations, although the triples contained in it are not the same we would have when removing the # first triple, and passing create_inverse_triples=True. t = [ ['e3', f'a.{INVERSE_SUFFIX}', 'e10'], ['e1', 'a', 'e2'], ['e1', 'a.', 'e5'], ] t = np.array(t, dtype=np.str) for create_inverse_triples in (False, True): with patch("pykeen.triples.triples_factory.logger.warning") as warning: factory = TriplesFactory.from_labeled_triples(triples=t, create_inverse_triples=create_inverse_triples) # check for warning warning.assert_called() # check for filtered triples assert factory.num_triples == 2 # check for correct inverse triples flag assert factory.create_inverse_triples == create_inverse_triples
def test_lcwa_margin_ranking_loss_helper(self): """Test if output is correct for the LCWA training loop use case.""" factory = TriplesFactory.from_labeled_triples(triples=self.triples) loss_cls = MarginRankingLoss( margin=0, reduction='sum', ) model = TransE( triples_factory=factory, embedding_dim=8, preferred_device='cpu', loss=loss_cls, ) loop = LCWATrainingLoop(model=model, triples_factory=factory) loss = loop._mr_loss_helper(predictions=self.predictions, labels=self.labels) self.assertEqual(14, loss) loss_cls = MarginRankingLoss( margin=0, reduction='mean', ) model = TransE( triples_factory=factory, embedding_dim=8, preferred_device='cpu', loss=loss_cls, ) loop = LCWATrainingLoop(model=model, triples_factory=factory) loss = loop._mr_loss_helper(predictions=self.predictions, labels=self.labels) self.assertEqual(1, loss)
def test_find_leak_assymetric(self): """Test finding test leakages with an asymmetric metric.""" n = 100 test_relation, test_relation_inverse = 'r', 'r_inverse' train_generated = list( itt.chain.from_iterable(([ [str(i), test_relation, str(j + 1 + n)], [str(j + 1 + n), test_relation_inverse, str(i)], ] for i, j in zip(range(n), range(n))))) train_non_inverses = [ ['a', 'fine', 'b'], ['b', 'fine', 'c'], ] forwards_extras = [ ['-1', test_relation, '-2'], # this one leaks! ['-3', test_relation, '-4'], ] inverse_extras = [ ['-5', test_relation_inverse, '-6'], ] train = train_generated + train_non_inverses + forwards_extras + inverse_extras test = [ ['-2', test_relation_inverse, '-1'], # this one was leaked! ] train_factory = TriplesFactory.from_labeled_triples( triples=np.array(train, dtype=np.str)) test_factory = TriplesFactory.from_labeled_triples( triples=np.array(test, dtype=np.str)) sealant = Sealant(train_factory, symmetric=False) expected_forwards_frequency = n / (n + len(forwards_extras)) expected_inverse_frequency = n / (n + len(inverse_extras)) self.assertGreater(len(forwards_extras), len(inverse_extras)) self.assertLess( expected_forwards_frequency, expected_inverse_frequency, msg='Forwards frequency should be higher than inverse frequency', ) self.assertEqual( { (test_relation, test_relation_inverse): expected_forwards_frequency, (test_relation_inverse, test_relation): expected_inverse_frequency, }, dict(sealant.candidate_inverse_relations), ) self.assertIn(test_relation, sealant.inverses) self.assertEqual(test_relation_inverse, sealant.inverses[test_relation]) self.assertIn(test_relation_inverse, sealant.inverses) self.assertEqual(test_relation, sealant.inverses[test_relation_inverse]) self.assertIn( test_relation_inverse, sealant.inverse_relations_to_delete, msg='The wrong relation was picked for deletion', ) test_leaked = sealant.get_inverse_triples(test_factory) self.assertEqual(1, len(test_leaked)) self.assertEqual(('-2', test_relation_inverse, '-1'), tuple(test_leaked[0]))
def test_find_leak_assymetric(self): """Test finding test leakages with an asymmetric metric.""" n = 100 min_frequency = 0.97 test_relation, test_relation_inverse = 'r', 'r_inverse' train_generated = list( itt.chain.from_iterable(([ [str(i), test_relation, str(j + 1 + n)], [str(j + 1 + n), test_relation_inverse, str(i)], ] for i, j in zip(range(n), range(n))))) train_non_inverses = [ ['a', 'fine', 'b'], ['b', 'fine', 'c'], ] forwards_extras = [ ['-1', test_relation, '-2'], # this one leaks! ['-3', test_relation, '-4'], ] inverse_extras = [ ['-5', test_relation_inverse, '-6'], ] train = train_generated + train_non_inverses + forwards_extras + inverse_extras test = [ ['-2', test_relation_inverse, '-1'], # this one was leaked! ] train_factory = TriplesFactory.from_labeled_triples( triples=np.array(train, dtype=np.str), filter_out_candidate_inverse_relations=False, ) test_factory = TriplesFactory.from_labeled_triples( triples=np.array(test, dtype=np.str), entity_to_id=train_factory.entity_to_id, relation_to_id=train_factory.relation_to_id, filter_out_candidate_inverse_relations=False, ) expected_forwards_frequency = n / (n + len(forwards_extras)) expected_inverse_frequency = n / (n + len(inverse_extras)) # expected_frequency = n / (n + len(forwards_extras) + len(inverse_extras)) # self.assertLessEqual(min_frequency, expected_frequency) self.assertGreater(len(forwards_extras), len(inverse_extras)) self.assertLess( expected_forwards_frequency, expected_inverse_frequency, msg='Forwards frequency should be higher than inverse frequency', ) sealant = Sealant(train_factory, symmetric=False, minimum_frequency=min_frequency) test_relation_id, test_relation_inverse_id = [ train_factory.relation_to_id[r] for r in (test_relation, test_relation_inverse) ] self.assertNotEqual( 0, len(sealant.candidate_inverse_relations), msg= f'did not find any candidate inverse relations at frequency>={min_frequency}', ) self.assertEqual( { (test_relation_id, test_relation_inverse_id): expected_forwards_frequency, (test_relation_inverse_id, test_relation_id): expected_inverse_frequency, }, dict(sealant.candidate_inverse_relations), ) self.assertIn(test_relation_id, sealant.inverses) self.assertEqual(test_relation_inverse_id, sealant.inverses[test_relation]) self.assertIn(test_relation_inverse_id, sealant.inverses) self.assertEqual(test_relation, sealant.inverses[test_relation_inverse_id]) self.assertIn( test_relation_inverse_id, sealant.inverse_relations_to_delete, msg='The wrong relation was picked for deletion', ) # Test looking up inverse triples test_leaked = test_factory.mapped_triples[ test_factory.get_mask_for_relations( relations=sealant.inverse_relations_to_delete, invert=False)] self.assertEqual(1, len(test_leaked)) self.assertEqual( (train_factory.entity_to_id['-2'], test_relation_inverse, train_factory.entity_to_id['-1']), tuple(test_leaked[0]), )