Exemple #1
0
    def _test_checkpoints(self, training_loop_type: str):
        """Test whether interrupting the given training loop type can be resumed using checkpoints."""
        training_loop_class = get_training_loop_cls(training_loop_type)

        # Train a model in one shot
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=self.random_seed,
        )
        optimizer_cls = get_optimizer_cls(None)
        optimizer = optimizer_cls(params=model.get_grad_params())
        training_loop = training_loop_class(model=model, optimizer=optimizer, automatic_memory_optimization=False)
        losses = training_loop.train(
            num_epochs=self.num_epochs,
            batch_size=self.batch_size,
            use_tqdm=False,
            use_tqdm_batch=False,
        )

        # Train a model for the first half
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=self.random_seed,
        )
        optimizer_cls = get_optimizer_cls(None)
        optimizer = optimizer_cls(params=model.get_grad_params())
        training_loop = training_loop_class(model=model, optimizer=optimizer, automatic_memory_optimization=False)
        training_loop.train(
            num_epochs=int(self.num_epochs // 2),
            batch_size=self.batch_size,
            checkpoint_name=self.checkpoint_file,
            checkpoint_directory=self.temporary_directory.name,
            checkpoint_frequency=0,
        )

        # Continue training of the first part
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=123,
        )
        optimizer_cls = get_optimizer_cls(None)
        optimizer = optimizer_cls(params=model.get_grad_params())
        training_loop = training_loop_class(model=model, optimizer=optimizer, automatic_memory_optimization=False)
        losses_2 = training_loop.train(
            num_epochs=self.num_epochs,
            batch_size=self.batch_size,
            checkpoint_name=self.checkpoint_file,
            checkpoint_directory=self.temporary_directory.name,
            checkpoint_frequency=0,
        )

        self.assertEqual(losses, losses_2)
Exemple #2
0
    def test_error_on_nan(self):
        """Test if the correct error is raised for non-finite loss values."""
        model = TransE(triples_factory=self.triples_factory)
        patience = 2

        class NaNTrainingLoop(self.cls):
            def __init__(self, **kwargs):
                super().__init__(**kwargs)
                self.patience = patience

            def _process_batch(self, *args, **kwargs):
                loss = super()._process_batch(*args, **kwargs)
                self.patience -= 1
                if self.patience < 0:
                    return torch.as_tensor([float("nan")], device=loss.device, dtype=torch.float32)
                return loss

        training_loop = NaNTrainingLoop(
            model=model,
            triples_factory=self.triples_factory,
            optimizer=self.optimizer_cls(model.get_grad_params()),
        )
        with self.assertRaises(NonFiniteLossError):
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=patience + 1,
                batch_size=self.batch_size,
            )
Exemple #3
0
class TrainingLoopTestCase(unittest_templates.GenericTestCase[TrainingLoop]):
    """A generic test case for training loops."""

    model: Model
    factory: TriplesFactory
    loss_cls: ClassVar[Type[Loss]]
    loss: Loss
    optimizer_cls: ClassVar[Type[Optimizer]] = Adam
    optimizer: Optimizer
    random_seed = 0
    batch_size: int = 128
    sub_batch_size: int = 30
    num_epochs: int = 10

    def pre_setup_hook(self) -> None:
        """Prepare case-level variables before the setup() function."""
        self.triples_factory = Nations().training
        self.loss = self.loss_cls()
        self.model = TransE(triples_factory=self.triples_factory, loss=self.loss, random_seed=self.random_seed)
        self.optimizer = self.optimizer_cls(self.model.get_grad_params())

    def _with_model(self, model: Model) -> TrainingLoop:
        return self.cls(
            model=model,
            triples_factory=self.triples_factory,
            automatic_memory_optimization=False,
            optimizer=self.optimizer_cls(model.get_grad_params()),
        )

    def _pre_instantiation_hook(self, kwargs: MutableMapping[str, Any]) -> MutableMapping[str, Any]:  # noqa: D102
        kwargs = super()._pre_instantiation_hook(kwargs=kwargs)
        kwargs["triples_factory"] = self.triples_factory
        kwargs["automatic_memory_optimization"] = False
        kwargs["optimizer"] = self.optimizer
        kwargs["model"] = self.model
        return kwargs

    def test_train(self):
        """Test training does not error."""
        self.instance.train(
            triples_factory=self.triples_factory,
            num_epochs=1,
        )

    def test_sub_batching(self):
        """Test if sub-batching works as expected."""
        self.instance.train(
            triples_factory=self.triples_factory,
            num_epochs=1,
            batch_size=self.batch_size,
            sub_batch_size=self.sub_batch_size,
        )

    def test_sub_batching_support(self):
        """Test if sub-batching works as expected."""
        model = ConvE(triples_factory=self.triples_factory)
        training_loop = self._with_model(model)

        with self.assertRaises(NotImplementedError):
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=1,
                batch_size=self.batch_size,
                sub_batch_size=self.sub_batch_size,
            )

    def test_error_on_nan(self):
        """Test if the correct error is raised for non-finite loss values."""
        model = TransE(triples_factory=self.triples_factory)
        patience = 2

        class NaNTrainingLoop(self.cls):
            def __init__(self, **kwargs):
                super().__init__(**kwargs)
                self.patience = patience

            def _process_batch(self, *args, **kwargs):
                loss = super()._process_batch(*args, **kwargs)
                self.patience -= 1
                if self.patience < 0:
                    return torch.as_tensor([float("nan")], device=loss.device, dtype=torch.float32)
                return loss

        training_loop = NaNTrainingLoop(
            model=model,
            triples_factory=self.triples_factory,
            optimizer=self.optimizer_cls(model.get_grad_params()),
        )
        with self.assertRaises(NonFiniteLossError):
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=patience + 1,
                batch_size=self.batch_size,
            )

    def test_checkpoints(self):
        """Test whether interrupting the given training loop type can be resumed using checkpoints."""
        # Train a model in one shot
        model = TransE(
            triples_factory=self.triples_factory,
            random_seed=self.random_seed,
        )
        training_loop = self._with_model(model)
        losses = training_loop.train(
            triples_factory=self.triples_factory,
            num_epochs=self.num_epochs,
            batch_size=self.batch_size,
            use_tqdm=False,
            use_tqdm_batch=False,
        )

        with tempfile.TemporaryDirectory() as directory:
            name = "checkpoint.pt"

            # Train a model for the first half
            model = TransE(
                triples_factory=self.triples_factory,
                random_seed=self.random_seed,
            )
            training_loop = self._with_model(model)
            training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=int(self.num_epochs // 2),
                batch_size=self.batch_size,
                checkpoint_name=name,
                checkpoint_directory=directory,
                checkpoint_frequency=0,
            )

            # Continue training of the first part
            model = TransE(
                triples_factory=self.triples_factory,
                random_seed=123,
            )
            training_loop = self._with_model(model)
            losses_2 = training_loop.train(
                triples_factory=self.triples_factory,
                num_epochs=self.num_epochs,
                batch_size=self.batch_size,
                checkpoint_name=name,
                checkpoint_directory=directory,
                checkpoint_frequency=0,
            )

        self.assertEqual(losses, losses_2)
Exemple #4
0
from pykeen.triples import TriplesFactory
from pykeen.evaluation import RankBasedEvaluator
from pykeen.models import TransE
from torch.optim import Adam
from pykeen.training import SLCWATrainingLoop

n_tokeep = 300
minimum = 500

tf = TriplesFactory.from_path(f'data/rare/rare_{minimum}_{n_tokeep}.csv')
training, testing, validation = tf.split([.8, .1, .1])

# Pick model, optimizer, training approach
model = TransE(triples_factory=training)
optimizer = Adam(params=model.get_grad_params())
training_loop = SLCWATrainingLoop(
    model=model,
    triples_factory=training,
    optimizer=optimizer,
)

# train
training_loop.train(
    triples_factory=training,
    num_epochs=500,
    batch_size=256,
)

# evaluate
evaluator = RankBasedEvaluator(ks=[50])
mapped_triples = testing.mapped_triples