Esempio n. 1
0
    def test_inverse_triples(self):
        """Test that the right number of entities and triples exist after inverting them."""
        triples_factory = TriplesFactory.from_labeled_triples(
            triples=triples, create_inverse_triples=True)
        self.assertEqual(4, triples_factory.num_relations)
        self.assertEqual(
            set(range(triples_factory.num_entities)),
            set(triples_factory.entity_to_id.values()),
            msg="wrong number entities",
        )
        self.assertEqual(
            set(range(triples_factory.real_num_relations)),
            set(triples_factory.relation_to_id.values()),
            msg="wrong number relations",
        )

        relations = set(triples[:, 1])
        entities = set(triples[:, 0]).union(triples[:, 2])
        self.assertEqual(len(entities),
                         triples_factory.num_entities,
                         msg="wrong number entities")
        self.assertEqual(2,
                         len(relations),
                         msg="Wrong number of relations in set")
        self.assertEqual(
            2 * len(relations),
            triples_factory.num_relations,
            msg="Wrong number of relations in factory",
        )
Esempio n. 2
0
    def test_count_inverse_frequencies(self):
        """Test counting inverse frequencies.

        Note, for r3, there are three triples, but the inverse triples are only counted once.
        """
        t = [
            ['a', 'r1', 'b'],
            #
            ['b', 'r2', 'c'],
            ['c', 'r2_inverse', 'b'],
            ['d', 'r2', 'e'],
            ['e', 'r2_inverse', 'd'],
            #
            ['g', 'r3', 'h'],
            ['h', 'r3_inverse', 'g'],
            ['i', 'r3', 'j'],
            ['k', 'r3', 'l'],
        ]
        triples_factory = TriplesFactory.from_labeled_triples(
            triples=np.array(t, dtype=np.str))
        frequencies = get_candidate_inverse_relations(triples_factory,
                                                      minimum_frequency=0.0,
                                                      symmetric=False)
        self.assertEqual(
            {
                ('r2', 'r2_inverse'): (2 / 2),
                ('r2_inverse', 'r2'): (2 / 2),
                ('r3', 'r3_inverse'): (1 / 3),
                ('r3_inverse', 'r3'): (1 / 1),
            },
            dict(frequencies),
        )
Esempio n. 3
0
 def test_right_sorting(self):
     """Test if the triples and the corresponding inverses are sorted correctly."""
     t = [
         ['e1', 'a', 'e1'],
         ['e1', 'a.', 'e1'],
         ['e1', f'a.{INVERSE_SUFFIX}', 'e1'],
         ['e1', 'a.bc', 'e1'],
         ['e1', f'a.bc{INVERSE_SUFFIX}', 'e1'],
         ['e1', f'a{INVERSE_SUFFIX}', 'e1'],
         ['e1', 'abc', 'e1'],
         ['e1', f'abc{INVERSE_SUFFIX}', 'e1'],
     ]
     t = np.array(t, dtype=np.str)
     factory = TriplesFactory.from_labeled_triples(
         triples=t, create_inverse_triples=False)
     reference_relation_to_id = {
         'a': 0,
         f'a{INVERSE_SUFFIX}': 1,
         'a.': 2,
         f'a.{INVERSE_SUFFIX}': 3,
         'a.bc': 4,
         f'a.bc{INVERSE_SUFFIX}': 5,
         'abc': 6,
         f'abc{INVERSE_SUFFIX}': 7,
     }
     self.assertEqual(reference_relation_to_id, factory.relation_to_id)
Esempio n. 4
0
    def test_inverse_triples(self):
        """Test that the right number of entities and triples exist after inverting them."""
        triples_factory = TriplesFactory.from_labeled_triples(
            triples=triples, create_inverse_triples=True)
        self.assertEqual(0, triples_factory.num_relations % 2)
        self.assertEqual(
            set(range(triples_factory.num_entities)),
            set(triples_factory.entity_to_id.values()),
            msg='wrong number entities',
        )
        self.assertEqual(
            set(range(triples_factory.num_relations)),
            set(triples_factory.relation_to_id.values()),
            msg='wrong number relations',
        )

        relations = set(triples[:, 1])
        entities = set(triples[:, 0]).union(triples[:, 2])
        self.assertEqual(len(entities),
                         triples_factory.num_entities,
                         msg='wrong number entities')
        self.assertEqual(2,
                         len(relations),
                         msg='Wrong number of relations in set')
        self.assertEqual(
            2 * len(relations),
            triples_factory.num_relations,
            msg='Wrong number of relations in factory',
        )

        self.assertIn(f'likes{INVERSE_SUFFIX}', triples_factory.relation_to_id)
Esempio n. 5
0
 def test_triples(self):
     """Test properties of the triples factory."""
     triples_factory = TriplesFactory.from_labeled_triples(triples=triples)
     self.assertEqual(set(range(triples_factory.num_entities)),
                      set(triples_factory.entity_to_id.values()))
     self.assertEqual(set(range(triples_factory.num_relations)),
                      set(triples_factory.relation_to_id.values()))
     self.assertTrue((triples_factory.mapped_triples ==
                      triples_factory.map_triples_to_id(triples)).all())
Esempio n. 6
0
 def test_correct_inverse_creation(self):
     """Test if the triples and the corresponding inverses are created."""
     t = [
         ['e1', 'a.', 'e5'],
         ['e1', 'a', 'e2'],
     ]
     t = np.array(t, dtype=np.str)
     factory = TriplesFactory.from_labeled_triples(triples=t, create_inverse_triples=True)
     instances = factory.create_slcwa_instances()
     assert len(instances) == 4
Esempio n. 7
0
 def test_correct_inverse_creation(self):
     """Test if the triples and the corresponding inverses are created and sorted correctly."""
     t = [
         ['e1', 'a.', 'e5'],
         ['e1', 'a', 'e2'],
     ]
     t = np.array(t, dtype=np.str)
     factory = TriplesFactory.from_labeled_triples(
         triples=t, create_inverse_triples=True)
     reference_relation_to_id = {
         'a': 0,
         f'a{INVERSE_SUFFIX}': 1,
         'a.': 2,
         f'a.{INVERSE_SUFFIX}': 3
     }
     self.assertEqual(reference_relation_to_id, factory.relation_to_id)
Esempio n. 8
0
    def _load_helper(self, relative_path) -> TriplesFactory:
        relative_path = path.join(self.dataset_path, relative_path)

        with open(relative_path) as file:
            df = pd.read_csv(
                file,
                usecols=[
                    self.head_column, self.relation_column, self.tail_column
                ],
                header=self.header,
                sep=self.sep,
            )

            entity_to_id = None
            relation_to_id = None

            if self.entity_to_id_path:
                node_mapping = pd.read_csv(self.entity_to_id_path,
                                           sep=self.entity_to_id_sep,
                                           header=None)
                entity_to_id = {
                    label: id
                    for label, id in zip(
                        node_mapping[self.entity_to_id_label_col],
                        node_mapping[self.entity_to_id_id_col])
                }

            if self.relation_to_id_path:
                relation_mapping = pd.read_csv(self.relation_to_id_path,
                                               sep=self.relation_to_id_sep,
                                               header=None)
                relation_to_id = {
                    label: id
                    for label, id in zip(
                        relation_mapping[self.relation_to_id_label_col],
                        relation_mapping[self.relation_to_id_id_col])
                }

            rv = TriplesFactory.from_labeled_triples(
                triples=df.values,
                entity_to_id=entity_to_id,
                relation_to_id=relation_to_id)

            rv.path = relative_path
            return rv
Esempio n. 9
0
 def test_automatic_incomplete_inverse_detection(self):
     """Test if the TriplesFactory detects that the triples contain incomplete inverses and creates correct ids."""
     t = [
         ['e3', f'a.{INVERSE_SUFFIX}', 'e10'],
         ['e1', 'a', 'e2'],
         ['e1', 'a.', 'e5'],
     ]
     t = np.array(t, dtype=np.str)
     factory = TriplesFactory.from_labeled_triples(
         triples=t, create_inverse_triples=False)
     reference_relation_to_id = {
         'a': 0,
         f'a{INVERSE_SUFFIX}': 1,
         'a.': 2,
         f'a.{INVERSE_SUFFIX}': 3
     }
     self.assertEqual(reference_relation_to_id, factory.relation_to_id)
     self.assertTrue(factory.create_inverse_triples)
Esempio n. 10
0
 def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]:  # noqa: D102
     kwargs = super()._pre_instantiation_hook(kwargs=kwargs)
     # TODO: use triple generation
     # generate random triples
     mapped_triples = numpy.stack([
         numpy.random.randint(max_id, size=(self.num_triples,))
         for max_id in (self.num, self.num_relations, self.num)
     ], axis=-1)
     entity_names = [f"e_{i}" for i in range(self.num)]
     relation_names = [f"r_{i}" for i in range(self.num_relations)]
     triples = numpy.stack([
         [names[i] for i in col.tolist()]
         for col, names in zip(
             mapped_triples.T,
             (entity_names, relation_names, entity_names),
         )
     ])
     kwargs["triples_factory"] = TriplesFactory.from_labeled_triples(triples=triples)
     return kwargs
Esempio n. 11
0
 def test_automatic_incomplete_inverse_detection(self):
     """Test detecting that the triples contain inverses, warns about them, and filters them out."""
     # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains
     # inverse relations, although the triples contained in it are not the same we would have when removing the
     # first triple, and passing create_inverse_triples=True.
     t = [
         ['e3', f'a.{INVERSE_SUFFIX}', 'e10'],
         ['e1', 'a', 'e2'],
         ['e1', 'a.', 'e5'],
     ]
     t = np.array(t, dtype=np.str)
     for create_inverse_triples in (False, True):
         with patch("pykeen.triples.triples_factory.logger.warning") as warning:
             factory = TriplesFactory.from_labeled_triples(triples=t, create_inverse_triples=create_inverse_triples)
             # check for warning
             warning.assert_called()
             # check for filtered triples
             assert factory.num_triples == 2
             # check for correct inverse triples flag
             assert factory.create_inverse_triples == create_inverse_triples
Esempio n. 12
0
    def test_lcwa_margin_ranking_loss_helper(self):
        """Test if output is correct for the LCWA training loop use case."""
        factory = TriplesFactory.from_labeled_triples(triples=self.triples)

        loss_cls = MarginRankingLoss(
            margin=0,
            reduction='sum',
        )

        model = TransE(
            triples_factory=factory,
            embedding_dim=8,
            preferred_device='cpu',
            loss=loss_cls,
        )

        loop = LCWATrainingLoop(model=model, triples_factory=factory)
        loss = loop._mr_loss_helper(predictions=self.predictions,
                                    labels=self.labels)
        self.assertEqual(14, loss)

        loss_cls = MarginRankingLoss(
            margin=0,
            reduction='mean',
        )

        model = TransE(
            triples_factory=factory,
            embedding_dim=8,
            preferred_device='cpu',
            loss=loss_cls,
        )

        loop = LCWATrainingLoop(model=model, triples_factory=factory)
        loss = loop._mr_loss_helper(predictions=self.predictions,
                                    labels=self.labels)
        self.assertEqual(1, loss)
Esempio n. 13
0
    def test_find_leak_assymetric(self):
        """Test finding test leakages with an asymmetric metric."""
        n = 100
        test_relation, test_relation_inverse = 'r', 'r_inverse'

        train_generated = list(
            itt.chain.from_iterable(([
                [str(i), test_relation, str(j + 1 + n)],
                [str(j + 1 + n), test_relation_inverse,
                 str(i)],
            ] for i, j in zip(range(n), range(n)))))
        train_non_inverses = [
            ['a', 'fine', 'b'],
            ['b', 'fine', 'c'],
        ]
        forwards_extras = [
            ['-1', test_relation, '-2'],  # this one leaks!
            ['-3', test_relation, '-4'],
        ]
        inverse_extras = [
            ['-5', test_relation_inverse, '-6'],
        ]
        train = train_generated + train_non_inverses + forwards_extras + inverse_extras
        test = [
            ['-2', test_relation_inverse, '-1'],  # this one was leaked!
        ]
        train_factory = TriplesFactory.from_labeled_triples(
            triples=np.array(train, dtype=np.str))
        test_factory = TriplesFactory.from_labeled_triples(
            triples=np.array(test, dtype=np.str))

        sealant = Sealant(train_factory, symmetric=False)

        expected_forwards_frequency = n / (n + len(forwards_extras))
        expected_inverse_frequency = n / (n + len(inverse_extras))
        self.assertGreater(len(forwards_extras), len(inverse_extras))
        self.assertLess(
            expected_forwards_frequency,
            expected_inverse_frequency,
            msg='Forwards frequency should be higher than inverse frequency',
        )
        self.assertEqual(
            {
                (test_relation, test_relation_inverse):
                expected_forwards_frequency,
                (test_relation_inverse, test_relation):
                expected_inverse_frequency,
            },
            dict(sealant.candidate_inverse_relations),
        )

        self.assertIn(test_relation, sealant.inverses)
        self.assertEqual(test_relation_inverse,
                         sealant.inverses[test_relation])
        self.assertIn(test_relation_inverse, sealant.inverses)
        self.assertEqual(test_relation,
                         sealant.inverses[test_relation_inverse])

        self.assertIn(
            test_relation_inverse,
            sealant.inverse_relations_to_delete,
            msg='The wrong relation was picked for deletion',
        )

        test_leaked = sealant.get_inverse_triples(test_factory)
        self.assertEqual(1, len(test_leaked))
        self.assertEqual(('-2', test_relation_inverse, '-1'),
                         tuple(test_leaked[0]))
Esempio n. 14
0
    def test_find_leak_assymetric(self):
        """Test finding test leakages with an asymmetric metric."""
        n = 100
        min_frequency = 0.97
        test_relation, test_relation_inverse = 'r', 'r_inverse'

        train_generated = list(
            itt.chain.from_iterable(([
                [str(i), test_relation, str(j + 1 + n)],
                [str(j + 1 + n), test_relation_inverse,
                 str(i)],
            ] for i, j in zip(range(n), range(n)))))
        train_non_inverses = [
            ['a', 'fine', 'b'],
            ['b', 'fine', 'c'],
        ]
        forwards_extras = [
            ['-1', test_relation, '-2'],  # this one leaks!
            ['-3', test_relation, '-4'],
        ]
        inverse_extras = [
            ['-5', test_relation_inverse, '-6'],
        ]
        train = train_generated + train_non_inverses + forwards_extras + inverse_extras
        test = [
            ['-2', test_relation_inverse, '-1'],  # this one was leaked!
        ]
        train_factory = TriplesFactory.from_labeled_triples(
            triples=np.array(train, dtype=np.str),
            filter_out_candidate_inverse_relations=False,
        )
        test_factory = TriplesFactory.from_labeled_triples(
            triples=np.array(test, dtype=np.str),
            entity_to_id=train_factory.entity_to_id,
            relation_to_id=train_factory.relation_to_id,
            filter_out_candidate_inverse_relations=False,
        )

        expected_forwards_frequency = n / (n + len(forwards_extras))
        expected_inverse_frequency = n / (n + len(inverse_extras))
        # expected_frequency = n / (n + len(forwards_extras) + len(inverse_extras))
        # self.assertLessEqual(min_frequency, expected_frequency)

        self.assertGreater(len(forwards_extras), len(inverse_extras))
        self.assertLess(
            expected_forwards_frequency,
            expected_inverse_frequency,
            msg='Forwards frequency should be higher than inverse frequency',
        )

        sealant = Sealant(train_factory,
                          symmetric=False,
                          minimum_frequency=min_frequency)
        test_relation_id, test_relation_inverse_id = [
            train_factory.relation_to_id[r]
            for r in (test_relation, test_relation_inverse)
        ]
        self.assertNotEqual(
            0,
            len(sealant.candidate_inverse_relations),
            msg=
            f'did not find any candidate inverse relations at frequency>={min_frequency}',
        )
        self.assertEqual(
            {
                (test_relation_id, test_relation_inverse_id):
                expected_forwards_frequency,
                (test_relation_inverse_id, test_relation_id):
                expected_inverse_frequency,
            },
            dict(sealant.candidate_inverse_relations),
        )

        self.assertIn(test_relation_id, sealant.inverses)
        self.assertEqual(test_relation_inverse_id,
                         sealant.inverses[test_relation])
        self.assertIn(test_relation_inverse_id, sealant.inverses)
        self.assertEqual(test_relation,
                         sealant.inverses[test_relation_inverse_id])

        self.assertIn(
            test_relation_inverse_id,
            sealant.inverse_relations_to_delete,
            msg='The wrong relation was picked for deletion',
        )

        # Test looking up inverse triples
        test_leaked = test_factory.mapped_triples[
            test_factory.get_mask_for_relations(
                relations=sealant.inverse_relations_to_delete, invert=False)]
        self.assertEqual(1, len(test_leaked))
        self.assertEqual(
            (train_factory.entity_to_id['-2'], test_relation_inverse,
             train_factory.entity_to_id['-1']),
            tuple(test_leaked[0]),
        )