def test_create_lcwa_instances(self): """Test create_lcwa_instances.""" factory = Nations().training instances = factory.create_lcwa_instances() assert isinstance(instances, LCWAInstances) # check compressed triples # reconstruct triples from compressed form reconstructed_triples = set() for hr, row_id in zip(instances.pairs, range(instances.compressed.shape[0])): h, r = hr.tolist() _, tails = instances.compressed[row_id].nonzero() reconstructed_triples.update((h, r, t) for t in tails.tolist()) original_triples = { tuple(hrt) for hrt in factory.mapped_triples.tolist() } assert original_triples == reconstructed_triples # check data loader for batch in torch.utils.data.DataLoader(instances, batch_size=2): assert len(batch) == 2 assert all(torch.is_tensor(x) for x in batch) x, y = batch batch_size = x.shape[0] assert x.shape == (batch_size, 2) assert x.dtype == torch.long assert y.shape == (batch_size, factory.num_entities) assert y.dtype == torch.get_default_dtype()
class TestSplit(unittest.TestCase): """Test splitting.""" triples_factory: TriplesFactory def setUp(self) -> None: """Set up the tests.""" self.triples_factory = Nations().training self.assertEqual(1592, self.triples_factory.num_triples) def test_split_naive(self): """Test splitting a factory in two with a given ratio.""" ratio = 0.8 train_triples_factory, test_triples_factory = self.triples_factory.split( ratio) expected_train_triples = int(self.triples_factory.num_triples * ratio) self.assertEqual(expected_train_triples, train_triples_factory.num_triples) self.assertEqual( self.triples_factory.num_triples - expected_train_triples, test_triples_factory.num_triples) def test_split_multi(self): """Test splitting a factory in three.""" ratios = r0, r1 = 0.80, 0.10 t0, t1, t2 = self.triples_factory.split(ratios) expected_0_triples = int(self.triples_factory.num_triples * r0) expected_1_triples = int(self.triples_factory.num_triples * r1) expected_2_triples = self.triples_factory.num_triples - expected_0_triples - expected_1_triples self.assertEqual(expected_0_triples, t0.num_triples) self.assertEqual(expected_1_triples, t1.num_triples) self.assertEqual(expected_2_triples, t2.num_triples)
def pre_setup_hook(self) -> None: """Set up the test case with a triples factory, training instances, and a default positive batch.""" self.triples_factory = Nations().training self.training_instances = self.triples_factory.create_slcwa_instances() random_state = numpy.random.RandomState(seed=self.seed) batch_indices = random_state.randint(low=0, high=len(self.training_instances), size=(self.batch_size, )) self.positive_batch = self.training_instances.mapped_triples[ batch_indices]
def test_new_with_restriction(self): """Test new_with_restriction().""" example_relation_restriction = { 'economicaid', 'dependent', } example_entity_restriction = { 'brazil', 'burma', 'china', } for inverse_triples in (True, False): original_triples_factory = Nations( create_inverse_triples=inverse_triples, ).training for entity_restriction in (None, example_entity_restriction): for relation_restriction in (None, example_relation_restriction): # apply restriction restricted_triples_factory = original_triples_factory.new_with_restriction( entities=entity_restriction, relations=relation_restriction, ) # check that the triples factory is returned as is, if and only if no restriction is to apply no_restriction_to_apply = (entity_restriction is None and relation_restriction is None) equal_factory_object = (id(restricted_triples_factory) == id(original_triples_factory)) assert no_restriction_to_apply == equal_factory_object # check that inverse_triples is correctly carried over assert (original_triples_factory.create_inverse_triples == restricted_triples_factory.create_inverse_triples) # verify that the label-to-ID mapping has not been changed assert original_triples_factory.entity_to_id == restricted_triples_factory.entity_to_id assert original_triples_factory.relation_to_id == restricted_triples_factory.relation_to_id # verify that triples have been filtered if entity_restriction is not None: present_relations = set( restricted_triples_factory.triples[:, 0]).union( restricted_triples_factory.triples[:, 2]) assert set(entity_restriction).issuperset( present_relations) if relation_restriction is not None: present_relations = set( restricted_triples_factory.triples[:, 1]) exp_relations = set(relation_restriction) if original_triples_factory.create_inverse_triples: exp_relations = exp_relations.union( map( original_triples_factory. relation_to_inverse.get, exp_relations)) assert exp_relations.issuperset(present_relations)
def setUp(self) -> None: """Set up the test case with a triples factory and model.""" self.batch_size = 16 self.seed = 42 self.num_negs_per_pos = 10 self.triples_factory = Nations().training self.slcwa_instances = self.triples_factory.create_slcwa_instances() self.negative_sampler = self.negative_sampling_cls(triples_factory=self.triples_factory) self.scaling_negative_sampler = self.negative_sampling_cls( triples_factory=self.triples_factory, num_negs_per_pos=self.num_negs_per_pos, ) random = numpy.random.RandomState(seed=self.seed) batch_indices = random.randint(low=0, high=self.slcwa_instances.num_instances, size=(self.batch_size,)) self.positive_batch = self.slcwa_instances.mapped_triples[batch_indices]
def test_early_stopping(self): """Tests early stopping.""" # Set automatic_memory_optimization to false during testing nations = Nations() model: Model = TransE(triples_factory=nations.training) evaluator = RankBasedEvaluator(automatic_memory_optimization=False) stopper = EarlyStopper( model=model, evaluator=evaluator, training_triples_factory=nations.training, evaluation_triples_factory=nations.validation, patience=self.patience, relative_delta=self.relative_delta, metric='mean_rank', ) training_loop = SLCWATrainingLoop( model=model, triples_factory=nations.training, optimizer=Adam(params=model.get_grad_params()), automatic_memory_optimization=False, ) losses = training_loop.train( triples_factory=nations.training, num_epochs=self.max_num_epochs, batch_size=self.batch_size, stopper=stopper, use_tqdm=False, ) self.assertEqual(stopper.number_results, len(losses) // stopper.frequency) self.assertEqual(self.stop_epoch, len(losses), msg='Did not stop early like it should have')
def setUpClass(cls): """Set up a shared result.""" cls.device = resolve_device('cuda') cls.dataset = Nations() cls.model = MockModel(triples_factory=cls.dataset.training) # The MockModel gives the highest score to the highest entity id max_score = cls.dataset.num_entities - 1 # The test triples are created to yield the third highest score on both head and tail prediction cls.dataset.testing.mapped_triples = torch.tensor( [[max_score - 2, 0, max_score - 2]]) # Write new mapped triples to the model, since the model's triples will be used to filter # These triples are created to yield the highest score on both head and tail prediction for the # test triple at hand cls.dataset.training.mapped_triples = torch.tensor([ [max_score - 2, 0, max_score], [max_score, 0, max_score - 2], ], ) # The validation triples are created to yield the second highest score on both head and tail prediction for the # test triple at hand cls.dataset.validation.mapped_triples = torch.tensor([ [max_score - 2, 0, max_score - 1], [max_score - 1, 0, max_score - 2], ], )
def setUp(self): """Prepare for testing the evaluation filtering.""" self.evaluator = RankBasedEvaluator( filtered=True, automatic_memory_optimization=False) self.triples_factory = Nations().training self.model = FixedModel(triples_factory=self.triples_factory) # The MockModel gives the highest score to the highest entity id max_score = self.triples_factory.num_entities - 1 # The test triples are created to yield the third highest score on both head and tail prediction self.test_triples = torch.tensor([[max_score - 2, 0, max_score - 2]]) # Write new mapped triples to the model, since the model's triples will be used to filter # These triples are created to yield the highest score on both head and tail prediction for the # test triple at hand self.training_triples = torch.tensor([ [max_score - 2, 0, max_score], [max_score, 0, max_score - 2], ], ) # The validation triples are created to yield the second highest score on both head and tail prediction for the # test triple at hand self.validation_triples = torch.tensor([ [max_score - 2, 0, max_score - 1], [max_score - 1, 0, max_score - 2], ], )
def test_sample_negatives(): """Test for sample_negatives.""" dataset = Nations() num_negatives = 2 evaluation_triples = dataset.validation.mapped_triples additional_filter_triples = dataset.training.mapped_triples negatives = sample_negatives( evaluation_triples=evaluation_triples, additional_filter_triples=additional_filter_triples, num_entities=dataset.num_entities, num_samples=num_negatives, ) head_negatives, tail_negatives = negatives[LABEL_HEAD], negatives[ LABEL_TAIL] num_triples = evaluation_triples.shape[0] true = set( map( tuple, prepare_filter_triples( mapped_triples=evaluation_triples, additional_filter_triples=additional_filter_triples, ).tolist(), )) for i, negatives in zip((0, 2), (head_negatives, tail_negatives)): assert torch.is_tensor(negatives) assert negatives.dtype == torch.long assert negatives.shape == (num_triples, num_negatives) # check true negatives full_negatives = torch.empty(num_triples, num_negatives, 3) full_negatives[:, :, :] = evaluation_triples[:, None, :] full_negatives[:, :, i] = negatives full_negatives = full_negatives.view(-1, 3) negative_set = set(map(tuple, full_negatives.tolist())) assert negative_set.isdisjoint(true)
def setUp(self): """Prepare for testing the evaluation structure.""" self.counter = 1337 self.evaluator = DummyEvaluator(counter=self.counter, filtered=True) self.triples_factory = Nations().training self.model = DummyModel(triples_factory=self.triples_factory, automatic_memory_optimization=False)
class _NegativeSamplingTestCase: """A test case for quickly defining common tests for samplers.""" #: The batch size batch_size: int #: The random seed seed: int #: The triples factory triples_factory: TriplesFactory #: The sLCWA instances slcwa_instances: SLCWAInstances #: Class of negative sampling to test negative_sampling_cls: ClassVar[Type[NegativeSampler]] #: The negative sampler instance, initialized in setUp negative_sampler: NegativeSampler #: A positive batch positive_batch: torch.LongTensor def setUp(self) -> None: """Set up the test case with a triples factory and model.""" self.batch_size = 16 self.seed = 42 self.num_negs_per_pos = 10 self.triples_factory = Nations().training self.slcwa_instances = self.triples_factory.create_slcwa_instances() self.negative_sampler = self.negative_sampling_cls(triples_factory=self.triples_factory) self.scaling_negative_sampler = self.negative_sampling_cls( triples_factory=self.triples_factory, num_negs_per_pos=self.num_negs_per_pos, ) random = numpy.random.RandomState(seed=self.seed) batch_indices = random.randint(low=0, high=self.slcwa_instances.num_instances, size=(self.batch_size,)) self.positive_batch = self.slcwa_instances.mapped_triples[batch_indices] def test_sample(self) -> None: # Generate negative sample negative_batch = self.negative_sampler.sample(positive_batch=self.positive_batch) # check shape assert negative_batch.shape == self.positive_batch.shape # check bounds: heads assert _array_check_bounds(negative_batch[:, 0], low=0, high=self.triples_factory.num_entities) # check bounds: relations assert _array_check_bounds(negative_batch[:, 1], low=0, high=self.triples_factory.num_relations) # check bounds: tails assert _array_check_bounds(negative_batch[:, 2], low=0, high=self.triples_factory.num_entities) # Check that all elements got corrupted assert (negative_batch != self.positive_batch).any(dim=1).all() # Generate scaled negative sample scaled_negative_batch = self.scaling_negative_sampler.sample( positive_batch=self.positive_batch, ) assert scaled_negative_batch.shape[0] == self.positive_batch.shape[0] * self.num_negs_per_pos assert scaled_negative_batch.shape[1] == self.positive_batch.shape[1]
def setUp(self) -> None: """Set up the test case with a triples factory and TransE as an example model.""" self.batch_size = 16 self.embedding_dim = 8 self.factory = Nations().training self.model = TransE(self.factory, embedding_dim=self.embedding_dim).to_device_()
def _pre_instantiation_hook( self, kwargs: MutableMapping[str, Any] ) -> MutableMapping[str, Any]: # noqa: D102 kwargs = super()._pre_instantiation_hook(kwargs=kwargs) self.generator = set_random_seed(seed=self.seed)[1] kwargs["triples_factory"] = self.triples_factory = Nations().training return kwargs
def setUp(self) -> None: """Instantiate triples factory and model.""" self.triples_factory = Nations().training self.random_seed = 123 self.checkpoint_file = "PyKEEN_training_loop_test_checkpoint.pt" self.num_epochs = 10 self.temporary_directory = tempfile.TemporaryDirectory()
def setUp(self) -> None: """Set up the test case with a triples factory.""" self.triples_factory = Nations().training self.num_samples = 20 self.num_epochs = 10 self.graph_sampler = GraphSampler(triples_factory=self.triples_factory, num_samples=self.num_samples)
def setUp(self) -> None: """Prepare for test.""" dataset = Nations() self.initializer = pykeen.nn.init.LabelBasedInitializer.from_triples_factory( triples_factory=dataset.training, for_entities=True, ) self.shape = self.initializer.tensor.shape
def pre_setup_hook(self) -> None: """Prepare case-level variables before the setup() function.""" self.triples_factory = Nations().training self.loss = self.loss_cls() self.model = TransE(triples_factory=self.triples_factory, loss=self.loss, random_seed=self.random_seed) self.optimizer = self.optimizer_cls(self.model.get_grad_params())
def _pre_instantiation_hook( self, kwargs: MutableMapping[str, Any] ) -> MutableMapping[str, Any]: # noqa: D102 kwargs = super()._pre_instantiation_hook(kwargs=kwargs) dataset = Nations() self.factory = dataset.training kwargs["triples_factory"] = self.factory return kwargs
def setUp(self) -> None: """Set up the test case with a triples factory.""" self.triples_factory = Nations().training self.batch_size = 20 self.num_epochs = 10 self.graph_sampler = GraphSampler( mapped_triples=self.triples_factory.mapped_triples, batch_size=self.batch_size, )
def setUp(self) -> None: """Set up the test case with a triples factory and model.""" self.device = resolve_device() self.triples_factory = Nations().training self.batch_size = 16 self.positive_batch = self.triples_factory.mapped_triples[:self.batch_size, :].to(device=self.device) super().setUp() # move test instance to device self.instance = self.instance.to(self.device)
def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]: # noqa: D102 kwargs = super()._pre_instantiation_hook(kwargs=kwargs) self.output_dim = self.input_dim self.factory = Nations().training self.source, self.edge_type, self.target = self.factory.mapped_triples.t() self.x = torch.rand(self.factory.num_entities, self.input_dim) kwargs["input_dim"] = self.input_dim kwargs["num_relations"] = self.factory.num_relations return kwargs
def setUpClass(cls): """Set up a shared result.""" cls.result = pipeline( model='TransE', dataset='nations', training_kwargs=dict(num_epochs=5), ) cls.model = cls.result.model nations = Nations() cls.testing_mapped_triples = nations.testing.mapped_triples.to(cls.model.device)
def test_remix(self): """Test the remix algorithm.""" reference = Nations() for random_state in range(20): derived = reference.remix(random_state=random_state) self.assertEqual(reference.training.num_triples, derived.training.num_triples) self.assertFalse((reference.training.mapped_triples == derived.training.mapped_triples).all()) self.assertEqual(reference.testing.num_triples, derived.testing.num_triples) self.assertFalse((reference.testing.mapped_triples == derived.testing.mapped_triples).all()) self.assertEqual(reference.validation.num_triples, derived.validation.num_triples) self.assertFalse((reference.validation.mapped_triples == derived.validation.mapped_triples).all())
def setUp(self) -> None: """Set up the test case with a triples factory and model.""" self.generator = torch.random.manual_seed(seed=42) self.batch_size = 16 self.triples_factory = Nations().training self.device = resolve_device() self.regularizer = self.regularizer_cls( device=self.device, **(self.regularizer_kwargs or {}), ) self.positive_batch = self.triples_factory.mapped_triples[:self.batch_size, :].to(device=self.device)
def setUp(self) -> None: """Set up the test case with a triples factory and model.""" _, self.generator, _ = set_random_seed(42) dataset = Nations(create_inverse_triples=self.create_inverse_triples) self.factory = dataset.training self.model = self.model_cls( triples_factory=self.factory, embedding_dim=self.embedding_dim, **(self.model_kwargs or {}), ).to_device_()
def test_relations_to_sparse_matrices(self): """Test :func:`triples_factory_to_sparse_matrices`.""" triples_factory = Nations().training rel, inv = triples_factory_to_sparse_matrices(triples_factory) for m in (rel, inv): # check type assert isinstance(m, scipy.sparse.spmatrix) assert m.dtype == numpy.int32 # check shape assert m.shape[0] == triples_factory.num_relations # check 1-hot assert m.max() == 1
def setUp(self) -> None: """Set up the test case.""" # Settings self.batch_size = 8 self.embedding_dim = 7 # Initialize evaluator self.evaluator = self.evaluator_cls(**(self.evaluator_kwargs or {})) # Use small test dataset self.factory = Nations().training # Use small model (untrained) self.model = TransE(triples_factory=self.factory, embedding_dim=self.embedding_dim)
def setUp(self): """Prepare for testing the early stopper.""" # Set automatic_memory_optimization to false for tests self.mock_evaluator = MockEvaluator(self.mock_losses, automatic_memory_optimization=False) nations = Nations() self.model = MockModel(triples_factory=nations.training) self.stopper = EarlyStopper( model=self.model, evaluator=self.mock_evaluator, evaluation_triples_factory=nations.validation, patience=self.patience, relative_delta=self.delta, larger_is_better=False, )
def setUpClass(cls): """Set up a shared result.""" cls.device = resolve_device('cuda') cls.result = pipeline( model='TransE', dataset='nations', training_kwargs=dict(num_epochs=5, use_tqdm=False), evaluation_kwargs=dict(use_tqdm=False), device=cls.device, random_seed=42, ) cls.model = cls.result.model nations = Nations() cls.testing_mapped_triples = nations.testing.mapped_triples.to(cls.model.device)
def test_metadata(self): """Test metadata passing for triples factories.""" t = Nations().training self.assertEqual(NATIONS_TRAIN_PATH, t.metadata['path']) self.assertEqual( ( f'TriplesFactory(num_entities=14, num_relations=55, num_triples=1592,' f' inverse_triples=False, path="{NATIONS_TRAIN_PATH}")' ), repr(t), ) entities = ['poland', 'ussr'] x = t.new_with_restriction(entities=entities) entities_ids = t.entities_to_ids(entities=entities) self.assertEqual(NATIONS_TRAIN_PATH, x.metadata['path']) self.assertEqual( ( f'TriplesFactory(num_entities=14, num_relations=55, num_triples=37,' f' inverse_triples=False, entity_restriction={repr(entities_ids)}, path="{NATIONS_TRAIN_PATH}")' ), repr(x), ) relations = ['negativebehavior'] v = t.new_with_restriction(relations=relations) relations_ids = t.relations_to_ids(relations=relations) self.assertEqual(NATIONS_TRAIN_PATH, x.metadata['path']) self.assertEqual( ( f'TriplesFactory(num_entities=14, num_relations=55, num_triples=29,' f' inverse_triples=False, path="{NATIONS_TRAIN_PATH}", relation_restriction={repr(relations_ids)})' ), repr(v), ) w = t.clone_and_exchange_triples(t.triples[0:5], keep_metadata=False) self.assertIsInstance(w, TriplesFactory) self.assertNotIn('path', w.metadata) self.assertEqual( 'TriplesFactory(num_entities=14, num_relations=55, num_triples=5, inverse_triples=False)', repr(w), ) y, z = t.split() self.assertEqual(NATIONS_TRAIN_PATH, y.metadata['path']) self.assertEqual(NATIONS_TRAIN_PATH, z.metadata['path'])