def test_triples(self): """Test properties of the triples factory.""" triples_factory = TriplesFactory(triples=triples) self.assertEqual(set(range(triples_factory.num_entities)), set(triples_factory.entity_to_id.values())) self.assertEqual(set(range(triples_factory.num_relations)), set(triples_factory.relation_to_id.values())) self.assertTrue((triples_factory.mapped_triples == triples_factory.map_triples_to_id(triples)).all())
def train_embedding(self, g, model="SimplE"): # pykeen from pykeen.pipeline import pipeline from pykeen.triples import TriplesFactory # create pseudo-nodes to enucode node attributes pleasent, not_pleasent = len(g.concepts), len(g.concepts) + 1 sensitiv, not_sensitive = len(g.concepts) + 2, len(g.concepts) + 3 # build triples triples = [] for c in g.concepts: # actual connections triples.extend(([c.index, 'semantic', j] for j in g.get_semantic_ids(c))) # encode attributes by binning if c.pleasentness != 0: triples.append([c.index, 'pleasent', pleasent if c.pleasentness > 0 else not_pleasent]) if c.sensitivity != 0: triples.append([c.index, 'sensitiv', sensitiv if c.sensitivity > 0 else not_sensitive]) triples, n = np.asarray(triples), len(triples) print("Number of Triples (Train/Total): %i/%i" % (int(0.8 * n), n)) # create mask for training and testing separation train_mask = np.full(n, False) train_mask[:int(n * 0.9)] = True np.random.shuffle(train_mask) # separate into training and testing train_triples = triples[train_mask] test_triples = triples[~train_mask] # create triples factories train_factory = TriplesFactory(triples=train_triples) test_factory = TriplesFactory(triples=test_triples) # create and run pipeline results = pipeline( # data training_triples_factory=train_factory, testing_triples_factory=test_factory, # model model=model, model_kwargs={ "embedding_dim": self.embedd_dim, "automatic_memory_optimization": True } ) # get embedding tensor - remove pseudo nodes weight = results.model.entity_embeddings.weight[:len(g.concepts), ...].cpu() # update word2id words = [c.text for c in g.concepts] self.word2id = OrderedDict( zip(words, range(1, len(words) + 1)) ) # 0th element is padding # update embeddings - add padding embedding at position 0 self.embedding = nn.Embedding( num_embeddings=len(words) + 1, embedding_dim=self.embedd_dim, _weight=torch.cat((torch.zeros((1, self.embedd_dim)), weight), dim=0) ) # return results return results
def test_inverse_triples(self): """Test that the right number of entities and triples exist after inverting them.""" triples_factory = TriplesFactory(triples=triples, create_inverse_triples=True) self.assertEqual(0, triples_factory.num_relations % 2) self.assertEqual( set(range(triples_factory.num_entities)), set(triples_factory.entity_to_id.values()), msg='wrong number entities', ) self.assertEqual( set(range(triples_factory.num_relations)), set(triples_factory.relation_to_id.values()), msg='wrong number relations', ) relations = set(triples[:, 1]) entities = set(triples[:, 0]).union(triples[:, 2]) self.assertEqual(len(entities), triples_factory.num_entities, msg='wrong number entities') self.assertEqual(2, len(relations), msg='Wrong number of relations in set') self.assertEqual( 2 * len(relations), triples_factory.num_relations, msg='Wrong number of relations in factory', ) self.assertIn(f'likes{INVERSE_SUFFIX}', triples_factory.relation_to_id)
def test_inverse_triples(self): """Test that the right number of entities and triples exist after inverting them.""" triples_factory = TriplesFactory.from_labeled_triples( triples=triples, create_inverse_triples=True) self.assertEqual(4, triples_factory.num_relations) self.assertEqual( set(range(triples_factory.num_entities)), set(triples_factory.entity_to_id.values()), msg="wrong number entities", ) self.assertEqual( set(range(triples_factory.real_num_relations)), set(triples_factory.relation_to_id.values()), msg="wrong number relations", ) relations = set(triples[:, 1]) entities = set(triples[:, 0]).union(triples[:, 2]) self.assertEqual(len(entities), triples_factory.num_entities, msg="wrong number entities") self.assertEqual(2, len(relations), msg="Wrong number of relations in set") self.assertEqual( 2 * len(relations), triples_factory.num_relations, msg="Wrong number of relations in factory", )
def test_count_inverse_frequencies(self): """Test counting inverse frequencies. Note, for r3, there are three triples, but the inverse triples are only counted once. """ t = [ ['a', 'r1', 'b'], # ['b', 'r2', 'c'], ['c', 'r2_inverse', 'b'], ['d', 'r2', 'e'], ['e', 'r2_inverse', 'd'], # ['g', 'r3', 'h'], ['h', 'r3_inverse', 'g'], ['i', 'r3', 'j'], ['k', 'r3', 'l'], ] triples_factory = TriplesFactory.from_labeled_triples( triples=np.array(t, dtype=np.str)) frequencies = get_candidate_inverse_relations(triples_factory, minimum_frequency=0.0, symmetric=False) self.assertEqual( { ('r2', 'r2_inverse'): (2 / 2), ('r2_inverse', 'r2'): (2 / 2), ('r3', 'r3_inverse'): (1 / 3), ('r3_inverse', 'r3'): (1 / 1), }, dict(frequencies), )
def test_right_sorting(self): """Test if the triples and the corresponding inverses are sorted correctly.""" t = [ ['e1', 'a', 'e1'], ['e1', 'a.', 'e1'], ['e1', f'a.{INVERSE_SUFFIX}', 'e1'], ['e1', 'a.bc', 'e1'], ['e1', f'a.bc{INVERSE_SUFFIX}', 'e1'], ['e1', f'a{INVERSE_SUFFIX}', 'e1'], ['e1', 'abc', 'e1'], ['e1', f'abc{INVERSE_SUFFIX}', 'e1'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory(triples=t, create_inverse_triples=False) reference_relation_to_id = { 'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3, 'a.bc': 4, f'a.bc{INVERSE_SUFFIX}': 5, 'abc': 6, f'abc{INVERSE_SUFFIX}': 7, } self.assertEqual(reference_relation_to_id, factory.relation_to_id)
def test_triples(self): """Test properties of the triples factory.""" triples_factory = TriplesFactory.from_labeled_triples(triples=triples) self.assertEqual(set(range(triples_factory.num_entities)), set(triples_factory.entity_to_id.values())) self.assertEqual(set(range(triples_factory.num_relations)), set(triples_factory.relation_to_id.values())) assert (_map_triples_elements_to_ids( triples=triples, entity_to_id=triples_factory.entity_to_id, relation_to_id=triples_factory.relation_to_id, ) == triples_factory.mapped_triples).all()
def test_correct_inverse_creation(self): """Test if the triples and the corresponding inverses are created.""" t = [ ['e1', 'a.', 'e5'], ['e1', 'a', 'e2'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory.from_labeled_triples(triples=t, create_inverse_triples=True) instances = factory.create_slcwa_instances() assert len(instances) == 4
def test_correct_inverse_creation(self): """Test if the triples and the corresponding inverses are created and sorted correctly.""" t = [ ['e1', 'a.', 'e5'], ['e1', 'a', 'e2'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory(triples=t, create_inverse_triples=True) reference_relation_to_id = {'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3} self.assertEqual(reference_relation_to_id, factory.relation_to_id)
def __load_train_data(self, train_data_path, validation_path, test_data_path): """ Reading training data in and loading in the generating required tensors """ self.__training = TriplesFactory( path=train_data_path, ) self.__valid = TriplesFactory( path=validation_path, entity_to_id=self.__training.entity_to_id, relation_to_id=self.__training.relation_to_id, ) self.__testing = TriplesFactory( path=test_data_path, entity_to_id=self.__training.entity_to_id, relation_to_id=self.__training.relation_to_id, )
def test_automatic_incomplete_inverse_detection(self): """Test if the TriplesFactory detects that the triples contain incomplete inverses and creates correct ids.""" t = [ ['e3', f'a.{INVERSE_SUFFIX}', 'e10'], ['e1', 'a', 'e2'], ['e1', 'a.', 'e5'], ] t = np.array(t, dtype=np.str) factory = TriplesFactory(triples=t, create_inverse_triples=False) reference_relation_to_id = {'a': 0, f'a{INVERSE_SUFFIX}': 1, 'a.': 2, f'a.{INVERSE_SUFFIX}': 3} self.assertEqual(reference_relation_to_id, factory.relation_to_id) self.assertTrue(factory.create_inverse_triples)
def _test_restriction( self, original_triples_factory: TriplesFactory, entity_restriction: Optional[Collection[str]], invert_entity_selection: bool, relation_restriction: Optional[Collection[str]], invert_relation_selection: bool, ): """Run the actual test for new_with_restriction.""" # apply restriction restricted_triples_factory = original_triples_factory.new_with_restriction( entities=entity_restriction, relations=relation_restriction, invert_entity_selection=invert_entity_selection, invert_relation_selection=invert_relation_selection, ) # check that the triples factory is returned as is, if and only if no restriction is to apply no_restriction_to_apply = entity_restriction is None and relation_restriction is None equal_factory_object = id(restricted_triples_factory) == id( original_triples_factory) assert no_restriction_to_apply == equal_factory_object # check that inverse_triples is correctly carried over assert original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples # verify that the label-to-ID mapping has not been changed assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id # verify that triples have been filtered if entity_restriction is not None: present_entities = set( restricted_triples_factory.triples[:, 0]).union( restricted_triples_factory.triples[:, 2]) expected_entities = (set( original_triples_factory.entity_id_to_label.values( )).difference(entity_restriction) if invert_entity_selection else entity_restriction) assert expected_entities.issuperset(present_entities) if relation_restriction is not None: present_relations = set(restricted_triples_factory.triples[:, 1]) expected_relations = ( set(original_triples_factory.relation_id_to_label.values()) if invert_relation_selection else set(relation_restriction)) assert expected_relations.issuperset(present_relations)
def _load_helper(self, relative_path) -> TriplesFactory: relative_path = path.join(self.dataset_path, relative_path) with open(relative_path) as file: df = pd.read_csv( file, usecols=[ self.head_column, self.relation_column, self.tail_column ], header=self.header, sep=self.sep, ) entity_to_id = None relation_to_id = None if self.entity_to_id_path: node_mapping = pd.read_csv(self.entity_to_id_path, sep=self.entity_to_id_sep, header=None) entity_to_id = { label: id for label, id in zip( node_mapping[self.entity_to_id_label_col], node_mapping[self.entity_to_id_id_col]) } if self.relation_to_id_path: relation_mapping = pd.read_csv(self.relation_to_id_path, sep=self.relation_to_id_sep, header=None) relation_to_id = { label: id for label, id in zip( relation_mapping[self.relation_to_id_label_col], relation_mapping[self.relation_to_id_id_col]) } rv = TriplesFactory.from_labeled_triples( triples=df.values, entity_to_id=entity_to_id, relation_to_id=relation_to_id) rv.path = relative_path return rv
def deteriorate( reference: TriplesFactory, *others: TriplesFactory, n: Union[int, float], random_state: TorchRandomHint = None, ) -> List[TriplesFactory]: """Remove n triples from the reference set. TODO: take care that triples aren't removed that are the only ones with any given entity """ if reference.create_inverse_triples: raise NotImplementedError if isinstance(n, float): if n < 0 or 1 <= n: raise ValueError n = int(n * reference.num_triples) generator = ensure_torch_random_state(random_state) logger.debug("random state %s", random_state) logger.debug("generator %s %s", generator, generator.get_state()) idx = torch.randperm(reference.num_triples, generator=generator) logger.debug("idx %s", idx) reference_idx, deteriorated_idx = idx.split(split_size=[reference.num_triples - n, n], dim=0) first = reference.clone_and_exchange_triples( mapped_triples=reference.mapped_triples[reference_idx], ) # distribute the deteriorated triples across the remaining factories didxs = deteriorated_idx.split(math.ceil(n / len(others)), dim=0) rest = [ tf.clone_and_exchange_triples( mapped_triples=( torch.cat([tf.mapped_triples, reference.mapped_triples[didx]], dim=0) if didx is not None else tf.mapped_triples # maybe just give same tf? should it be copied? ), ) for didx, tf in zip_longest(didxs, others) ] return [first, *rest]
def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # noqa: D102 kwargs = super()._pre_instantiation_hook(kwargs=kwargs) # TODO: use triple generation # generate random triples mapped_triples = numpy.stack([ numpy.random.randint(max_id, size=(self.num_triples,)) for max_id in (self.num, self.num_relations, self.num) ], axis=-1) entity_names = [f"e_{i}" for i in range(self.num)] relation_names = [f"r_{i}" for i in range(self.num_relations)] triples = numpy.stack([ [names[i] for i in col.tolist()] for col, names in zip( mapped_triples.T, (entity_names, relation_names, entity_names), ) ]) kwargs["triples_factory"] = TriplesFactory.from_labeled_triples(triples=triples) return kwargs
def test_custom_tf(self): """Test using a custom triples factories with HPO. .. seealso:: https://github.com/pykeen/pykeen/issues/230 """ tf = TriplesFactory.from_path(path=NATIONS_TRAIN_PATH) training, testing, validation = tf.split([.8, .1, .1], random_state=0) hpo_pipeline_result = hpo_pipeline( training=training, testing=testing, validation=validation, model='TransE', n_trials=2, training_kwargs=dict(num_epochs=2), ) with tempfile.TemporaryDirectory() as directory: hpo_pipeline_result.save_to_directory(directory)
def test_automatic_incomplete_inverse_detection(self): """Test detecting that the triples contain inverses, warns about them, and filters them out.""" # comment(mberr): from my pov this behaviour is faulty: the triples factory is expected to say it contains # inverse relations, although the triples contained in it are not the same we would have when removing the # first triple, and passing create_inverse_triples=True. t = [ ['e3', f'a.{INVERSE_SUFFIX}', 'e10'], ['e1', 'a', 'e2'], ['e1', 'a.', 'e5'], ] t = np.array(t, dtype=np.str) for create_inverse_triples in (False, True): with patch("pykeen.triples.triples_factory.logger.warning") as warning: factory = TriplesFactory.from_labeled_triples(triples=t, create_inverse_triples=create_inverse_triples) # check for warning warning.assert_called() # check for filtered triples assert factory.num_triples == 2 # check for correct inverse triples flag assert factory.create_inverse_triples == create_inverse_triples
def test_custom_tf_object(self): """Test using a custom triples factories with HPO. .. seealso:: https://github.com/pykeen/pykeen/issues/230 """ tf = TriplesFactory.from_path(path=NATIONS_TRAIN_PATH) training, testing, validation = tf.split([.8, .1, .1], random_state=0) hpo_pipeline_result = self._help_test_hpo( study_name='HPO with custom triples factories', training=training, testing=testing, validation=validation, ) self.assertNotIn('dataset', hpo_pipeline_result.study.user_attrs) # Since there's no source path information, these shouldn't be # added, even if it might be possible to infer path information # from the triples factories self.assertNotIn('training', hpo_pipeline_result.study.user_attrs) self.assertNotIn('testing', hpo_pipeline_result.study.user_attrs) self.assertNotIn('validation', hpo_pipeline_result.study.user_attrs)
def test_lcwa_margin_ranking_loss_helper(self): """Test if output is correct for the LCWA training loop use case.""" factory = TriplesFactory.from_labeled_triples(triples=self.triples) loss_cls = MarginRankingLoss( margin=0, reduction='sum', ) model = TransE( triples_factory=factory, embedding_dim=8, preferred_device='cpu', loss=loss_cls, ) loop = LCWATrainingLoop(model=model, triples_factory=factory) loss = loop._mr_loss_helper(predictions=self.predictions, labels=self.labels) self.assertEqual(14, loss) loss_cls = MarginRankingLoss( margin=0, reduction='mean', ) model = TransE( triples_factory=factory, embedding_dim=8, preferred_device='cpu', loss=loss_cls, ) loop = LCWATrainingLoop(model=model, triples_factory=factory) loss = loop._mr_loss_helper(predictions=self.predictions, labels=self.labels) self.assertEqual(1, loss)
def test_find_leak_assymetric(self): """Test finding test leakages with an asymmetric metric.""" n = 100 test_relation, test_relation_inverse = 'r', 'r_inverse' train_generated = list( itt.chain.from_iterable(([ [str(i), test_relation, str(j + 1 + n)], [str(j + 1 + n), test_relation_inverse, str(i)], ] for i, j in zip(range(n), range(n))))) train_non_inverses = [ ['a', 'fine', 'b'], ['b', 'fine', 'c'], ] forwards_extras = [ ['-1', test_relation, '-2'], # this one leaks! ['-3', test_relation, '-4'], ] inverse_extras = [ ['-5', test_relation_inverse, '-6'], ] train = train_generated + train_non_inverses + forwards_extras + inverse_extras test = [ ['-2', test_relation_inverse, '-1'], # this one was leaked! ] train_factory = TriplesFactory.from_labeled_triples( triples=np.array(train, dtype=np.str)) test_factory = TriplesFactory.from_labeled_triples( triples=np.array(test, dtype=np.str)) sealant = Sealant(train_factory, symmetric=False) expected_forwards_frequency = n / (n + len(forwards_extras)) expected_inverse_frequency = n / (n + len(inverse_extras)) self.assertGreater(len(forwards_extras), len(inverse_extras)) self.assertLess( expected_forwards_frequency, expected_inverse_frequency, msg='Forwards frequency should be higher than inverse frequency', ) self.assertEqual( { (test_relation, test_relation_inverse): expected_forwards_frequency, (test_relation_inverse, test_relation): expected_inverse_frequency, }, dict(sealant.candidate_inverse_relations), ) self.assertIn(test_relation, sealant.inverses) self.assertEqual(test_relation_inverse, sealant.inverses[test_relation]) self.assertIn(test_relation_inverse, sealant.inverses) self.assertEqual(test_relation, sealant.inverses[test_relation_inverse]) self.assertIn( test_relation_inverse, sealant.inverse_relations_to_delete, msg='The wrong relation was picked for deletion', ) test_leaked = sealant.get_inverse_triples(test_factory) self.assertEqual(1, len(test_leaked)) self.assertEqual(('-2', test_relation_inverse, '-1'), tuple(test_leaked[0]))
from pykeen.triples import TriplesFactory from pykeen.pipeline import pipeline training_path: str = "kg/train.hrt.txt" validation_path: str = "kg/valid.hrt.txt" testing_path: str = "kg/test.hrt.txt" training = TriplesFactory(path=training_path, ) valid = TriplesFactory( path=validation_path, entity_to_id=training.entity_to_id, relation_to_id=training.relation_to_id, ) testing = TriplesFactory( path=testing_path, entity_to_id=training.entity_to_id, relation_to_id=training.relation_to_id, ) result = pipeline(training=training, validation=valid, testing=testing, model='TransE', training_kwargs=dict(num_epochs=2, batch_size=512), evaluation_kwargs=dict(batch_size=128)) result.save_to_directory('saved-model') import torch model = torch.load('saved-model/trained_model.pkl') print(model.predict_heads('VARIANT_DISEASE_associated', 'Leigh_syndrome'))
def do_kge(edgelist: pd.DataFrame, design: pd.DataFrame, out: str, model_config: Dict, return_patients: Optional[bool] = True, train_size: Optional[float] = 0.8, validation_size: Optional[float] = 0.1) -> pd.DataFrame: """Carry out KGE on the given data. :param edgelist: Dataframe containing the patient-feature graph in edgelist format :param design: Dataframe containing the design table for the data :param out: Output folder for the results :param model_config: Configuration file for the KGE models, in JSON format. :param return_patients: Flag to indicate if the final data should contain only patients or even the features :param train_size: Size of the training data for KGE ranging from 0 - 1 :param validation_size: Size of the validation data for KGE ranging from 0 - 1. It must be lower than training size :return: Dataframe containing the embedding from the KGE """ design_norm_df = design.astype(str, copy=True) unique_nodes = edgelist[~edgelist['label'].isna()].drop_duplicates( 'source') label_mapping = { patient: label for patient, label in zip(unique_nodes['source'], unique_nodes['label']) } edgelist = edgelist.drop(columns='label') # Split the edgelist into training, validation and testing data train, validation, test = _weighted_splitter( edgelist=edgelist, train_size=train_size, validation_size=validation_size) train.to_csv(f'{out}/train.edgelist', sep='\t', index=False, header=False) validation.to_csv(f'{out}/validation.edgelist', sep='\t', index=False, header=False) test.to_csv(f'{out}/test.edgelist', sep='\t', index=False, header=False) create_inverse_triples = False # In a second HPO configuration, this can be set to true training_factory = TriplesFactory( path=f'{out}/train.edgelist', create_inverse_triples=create_inverse_triples, ) validation_factory = TriplesFactory( path=f'{out}/validation.edgelist', create_inverse_triples=create_inverse_triples, ) testing_factory = TriplesFactory( path=f'{out}/test.edgelist', create_inverse_triples=create_inverse_triples, ) run_optimization(dataset=(training_factory, validation_factory, testing_factory), model_config=model_config, out_dir=out) best_model = run_pipeline(dataset=(training_factory, validation_factory, testing_factory), out_dir=out).model # Get the embedding as a numpy array embedding_values = _model_to_numpy(best_model) # Create columns as component names embedding_columns = [ f'Component_{i}' for i in range(1, embedding_values.shape[1] + 1) ] # Get the nodes of the training triples as index node_list = list(best_model.triples_factory.entity_to_id.keys()) embedding_index = sorted( node_list, key=lambda x: best_model.triples_factory.entity_to_id[x]) embedding = pd.DataFrame(data=embedding_values, columns=embedding_columns, index=embedding_index) if return_patients: # TODO: Use clustering before classification to see if embeddings are already good enough embedding = embedding[embedding.index.isin(design_norm_df['FileName'])] for index in embedding.index: embedding.at[index, 'label'] = label_mapping[index] return embedding
def test_find_leak_assymetric(self): """Test finding test leakages with an asymmetric metric.""" n = 100 min_frequency = 0.97 test_relation, test_relation_inverse = 'r', 'r_inverse' train_generated = list( itt.chain.from_iterable(([ [str(i), test_relation, str(j + 1 + n)], [str(j + 1 + n), test_relation_inverse, str(i)], ] for i, j in zip(range(n), range(n))))) train_non_inverses = [ ['a', 'fine', 'b'], ['b', 'fine', 'c'], ] forwards_extras = [ ['-1', test_relation, '-2'], # this one leaks! ['-3', test_relation, '-4'], ] inverse_extras = [ ['-5', test_relation_inverse, '-6'], ] train = train_generated + train_non_inverses + forwards_extras + inverse_extras test = [ ['-2', test_relation_inverse, '-1'], # this one was leaked! ] train_factory = TriplesFactory.from_labeled_triples( triples=np.array(train, dtype=np.str), filter_out_candidate_inverse_relations=False, ) test_factory = TriplesFactory.from_labeled_triples( triples=np.array(test, dtype=np.str), entity_to_id=train_factory.entity_to_id, relation_to_id=train_factory.relation_to_id, filter_out_candidate_inverse_relations=False, ) expected_forwards_frequency = n / (n + len(forwards_extras)) expected_inverse_frequency = n / (n + len(inverse_extras)) # expected_frequency = n / (n + len(forwards_extras) + len(inverse_extras)) # self.assertLessEqual(min_frequency, expected_frequency) self.assertGreater(len(forwards_extras), len(inverse_extras)) self.assertLess( expected_forwards_frequency, expected_inverse_frequency, msg='Forwards frequency should be higher than inverse frequency', ) sealant = Sealant(train_factory, symmetric=False, minimum_frequency=min_frequency) test_relation_id, test_relation_inverse_id = [ train_factory.relation_to_id[r] for r in (test_relation, test_relation_inverse) ] self.assertNotEqual( 0, len(sealant.candidate_inverse_relations), msg= f'did not find any candidate inverse relations at frequency>={min_frequency}', ) self.assertEqual( { (test_relation_id, test_relation_inverse_id): expected_forwards_frequency, (test_relation_inverse_id, test_relation_id): expected_inverse_frequency, }, dict(sealant.candidate_inverse_relations), ) self.assertIn(test_relation_id, sealant.inverses) self.assertEqual(test_relation_inverse_id, sealant.inverses[test_relation]) self.assertIn(test_relation_inverse_id, sealant.inverses) self.assertEqual(test_relation, sealant.inverses[test_relation_inverse_id]) self.assertIn( test_relation_inverse_id, sealant.inverse_relations_to_delete, msg='The wrong relation was picked for deletion', ) # Test looking up inverse triples test_leaked = test_factory.mapped_triples[ test_factory.get_mask_for_relations( relations=sealant.inverse_relations_to_delete, invert=False)] self.assertEqual(1, len(test_leaked)) self.assertEqual( (train_factory.entity_to_id['-2'], test_relation_inverse, train_factory.entity_to_id['-1']), tuple(test_leaked[0]), )
from pykeen.triples import TriplesFactory from pykeen.evaluation import RankBasedEvaluator from pykeen.pipeline import pipeline import json n_tokeep = 300 minimum = 500 tf = TriplesFactory.from_path(f'data/rare/rare_{minimum}_{n_tokeep}.csv') training, testing = tf.split([.8, .2]) result_pipeline = pipeline( training=training, testing=testing, model='RESCAL', model_kwargs=dict(embedding_dim=300), training_kwargs=dict( #sampler="schlichtkrull", # checkpoint_name='RGCN_checkpointt.pt', # checkpoint_frequency=5, num_epochs=200 #, #batch_size=128 ), evaluator=RankBasedEvaluator, evaluator_kwargs=dict(ks=[50])) result_pipeline.plot_losses() result_pipeline.plot()