Esempio n. 1
0
def runner():
    return dl.SupervisedRunner(
        engine=get_available_engine(),
        input_key="features",
        output_key="logits",
        target_key="targets",
        loss_key="loss",
    )
Esempio n. 2
0
 def get_engine(self) -> IEngine:
     """Returns the engine for the run."""
     engine_params = self._config.engine
     if engine_params is not None:
         engine = hydra.utils.instantiate(engine_params)
     else:
         engine = get_available_engine(
             fp16=self._fp16, ddp=self._ddp, amp=self._amp, apex=self._apex
         )
     return engine
Esempio n. 3
0
 def get_engine(self) -> IEngine:
     """Returns the engine for the run."""
     engine_params = self._config.get("engine", None)
     if engine_params is not None:
         engine = REGISTRY.get_from_params(**engine_params)
     else:
         engine = get_available_engine(fp16=self._fp16,
                                       ddp=self._ddp,
                                       amp=self._amp,
                                       apex=self._apex)
     return engine
Esempio n. 4
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: TorchModel = None,
        engine: Union["Engine", str] = None,
        seed: int = 42,
        # extra info
        resume: str = None,
        # engine extra params,
        cpu: bool = False,
        fp16: bool = False,
    ) -> Generator:
        """
        Runs model inference on PyTorch DataLoader and returns
        python generator with model predictions from `runner.predict_batch`.

        Args:
            loader: loader to predict
            model: model to use for prediction
            engine: engine to use for prediction
            seed: random seed to use before prediction
            resume: path to checkpoint for model
            cpu: boolean flag to force CPU usage
            fp16: boolean flag to use half-precision

        Yields:
            bathes with model predictions

        .. note::
            Please follow the `minimal examples`_ sections for use cases.

            .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples  # noqa: E501, W505
        """
        self.engine = engine or get_available_engine(cpu=cpu, fp16=fp16)

        if model is not None:
            self.model = model
        assert self.model is not None

        if resume is not None:
            self.engine.wait_for_everyone()
            unwrapped_model = self.engine.unwrap_model(self.model)
            unwrapped_model.load_state_dict(load_checkpoint(resume))

        self.model = self.engine.prepare(self.model)
        maybe_recursive_call(self.model, "train", mode=False)
        loader = self.engine.prepare(loader)

        set_global_seed(seed)
        for batch in loader:
            yield self.predict_batch(batch)
Esempio n. 5
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        engine: Union["IEngine", str] = None,
        seed: int = 42,
        # engine extra params,
        fp16: bool = False,
        amp: bool = False,
        apex: bool = False,
        ddp: bool = False,
    ) -> Generator:
        """
        Runs model inference on PyTorch DataLoader and returns
        python generator with model predictions from `runner.predict_batch`.

        Args:
            loader: loader to predict
            model: model to use for prediction
            engine: engine to use for prediction
            seed: random seed to use before prediction
            fp16: boolean flag to use half-precision training (AMP > APEX)
            amp: boolean flag to use amp half-precision
            apex: boolean flag to use apex half-precision
            ddp: if `True` will start training in distributed mode.
                Note: Works only with python scripts. No jupyter support.

        Yields:
            bathes with model predictions
        """
        self._engine = engine or get_available_engine(
            fp16=fp16, ddp=ddp, amp=amp, apex=apex)

        if model is not None:
            self.model = model
        assert self.model is not None

        # if resume is not None:
        #     checkpoint = load_checkpoint(resume)
        #     unpack_checkpoint(checkpoint, model=self.model)

        self.model = self.engine.sync_device(self.model)
        maybe_recursive_call(self.model, "train", mode=False)

        set_global_seed(seed)
        for batch in loader:
            yield self.predict_batch(batch)
Esempio n. 6
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        engine: Union["IEngine", str] = None,
        seed: int = 42,
    ) -> Generator:
        """
        Runs model inference on PyTorch DataLoader and returns
        python generator with model predictions from `runner.predict_batch`.

        Args:
            loader: loader to predict
            model: model to use for prediction
            engine: engine to use for prediction
            seed: random seed to use before prediction

        Yields:
            bathes with model predictions
        """
        if engine is not None:
            self.engine = engine
        if self.engine is None:
            self.engine = get_available_engine()

        if model is not None:
            self.model = model
        assert self.model is not None

        # if resume is not None:
        #     checkpoint = load_checkpoint(resume)
        #     unpack_checkpoint(checkpoint, model=self.model)

        self.model = self.engine.sync_device(self.model)
        maybe_recursive_call(self.model, "train", mode=False)

        set_global_seed(seed)
        for batch in loader:
            yield self.predict_batch(batch)
Esempio n. 7
0
    def predict_loader(
        self,
        *,
        loader: DataLoader,
        model: Model = None,
        engine: Union["IEngine", str] = None,
        seed: int = 42,
        # engine extra params,
        fp16: bool = False,
        amp: bool = False,
        apex: bool = False,
        ddp: bool = False,
    ) -> Generator:
        """
        Runs model inference on PyTorch DataLoader and returns
        python generator with model predictions from `runner.predict_batch`.

        Args:
            loader: loader to predict
            model: model to use for prediction
            engine: engine to use for prediction
            seed: random seed to use before prediction
            fp16: boolean flag to use half-precision training (AMP > APEX)
            amp: boolean flag to use amp half-precision
            apex: boolean flag to use apex half-precision
            ddp: if `True` will start training in distributed mode.
                Note: Works only with python scripts. No jupyter support.

        Yields:
            bathes with model predictions

        .. note::
            Please follow the `minimal examples`_ sections for use cases.

            .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples

        Examples:

        .. code-block:: python

            import os
            from torch import nn, optim
            from torch.nn import functional as F
            from torch.utils.data import DataLoader
            from catalyst import dl, metrics
            from catalyst.data.transforms import ToTensor
            from catalyst.contrib.datasets import MNIST

            model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
            optimizer = optim.Adam(model.parameters(), lr=0.02)

            loaders = {
                "train": DataLoader(
                    MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()),
                    batch_size=32
                ),
                "valid": DataLoader(
                    MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()),
                    batch_size=32
                ),
            }

            class CustomRunner(dl.Runner):
                def predict_batch(self, batch):
                    # model inference step
                    return self.model(batch[0].to(self.device))

                def on_loader_start(self, runner):
                    super().on_loader_start(runner)
                    self.meters = {
                        key: metrics.AdditiveValueMetric(compute_on_call=False)
                        for key in ["loss", "accuracy01", "accuracy03"]
                    }

                def handle_batch(self, batch):
                    # model train/valid step
                    # unpack the batch
                    x, y = batch
                    # run model forward pass
                    logits = self.model(x)
                    # compute the loss
                    loss = F.cross_entropy(logits, y)
                    # compute other metrics of interest
                    accuracy01, accuracy03 = metrics.accuracy(logits, y, topk=(1, 3))
                    # log metrics
                    self.batch_metrics.update(
                        {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
                    )
                    for key in ["loss", "accuracy01", "accuracy03"]:
                        self.meters[key].update(
                            self.batch_metrics[key].item(),
                            self.batch_size
                        )
                    # run model backward pass
                    if self.is_train_loader:
                        loss.backward()
                        self.optimizer.step()
                        self.optimizer.zero_grad()

                def on_loader_end(self, runner):
                    for key in ["loss", "accuracy01", "accuracy03"]:
                        self.loader_metrics[key] = self.meters[key].compute()[0]
                    super().on_loader_end(runner)

            runner = CustomRunner()
            # model training
            runner.train(
                model=model,
                optimizer=optimizer,
                loaders=loaders,
                logdir="./logs",
                num_epochs=5,
                verbose=True,
                valid_loader="valid",
                valid_metric="loss",
                minimize_valid_metric=True,
            )
            # model inference
            for logits in runner.predict_loader(loader=loaders["valid"]):
                assert logits.detach().cpu().numpy().shape[-1] == 10
        """
        self._engine = engine or get_available_engine(fp16=fp16, ddp=ddp, amp=amp, apex=apex)

        if model is not None:
            self.model = model
        assert self.model is not None

        # if resume is not None:
        #     checkpoint = load_checkpoint(resume)
        #     unpack_checkpoint(checkpoint, model=self.model)

        self.model = self.engine.sync_device(self.model)
        maybe_recursive_call(self.model, "train", mode=False)

        set_global_seed(seed)
        for batch in loader:
            yield self.predict_batch(batch)
Esempio n. 8
0
    def train(
        self,
        *,
        # the data
        loaders: "OrderedDict[str, DataLoader]",
        # the core
        model: Model,
        engine: Union["IEngine", str] = None,
        trial: ITrial = None,
        # the components
        criterion: Criterion = None,
        optimizer: Optimizer = None,
        scheduler: Scheduler = None,
        # the callbacks
        callbacks: "Union[List[Callback], OrderedDict[str, Callback]]" = None,
        # the loggers
        loggers: "Dict[str, ILogger]" = None,
        # experiment info
        seed: int = 42,
        hparams: Dict[str, Any] = None,
        # stage info
        num_epochs: int = 1,
        # extra info (callbacks info)
        logdir: str = None,
        valid_loader: str = None,
        valid_metric: str = None,
        minimize_valid_metric: bool = True,
        verbose: bool = False,
        timeit: bool = False,
        check: bool = False,
        overfit: bool = False,
        load_best_on_end: bool = False,
        # engine extra params,
        fp16: bool = False,
        amp: bool = False,
        apex: bool = False,
        ddp: bool = False,
    ) -> None:
        """
        Starts the train stage of the model.

        Args:
            loaders: dictionary with one or several ``torch.utils.data.DataLoader``
                for training, validation or inference
            model: model to train
            engine: engine to use for model training
            trial: trial to use during model training
            criterion: criterion function for training
            optimizer: optimizer for training
            scheduler: scheduler for training
            callbacks: list or dictionary with Catalyst callbacks
            loggers: dictionary with Catalyst loggers
            seed: experiment's initial seed value
            hparams: hyperparameters for the run
            num_epochs: number of training epochs
            logdir: path to output directory
            valid_loader: loader name used to calculate
                the metrics and save the checkpoints. For example,
                you can pass `train` and then
                the metrics will be taken from `train` loader.
            valid_metric: the key to the name of the metric
                by which the checkpoints will be selected.
            minimize_valid_metric: flag to indicate whether
                the ``valid_metric`` should be minimized or not (default: True).
            verbose: if `True`, it displays the status of the training to the console.
            timeit: if True, computes the execution time
                of training process and displays it to the console.
            check: if True, then only checks that pipeline is working
                (3 epochs only with 3 batches per loader)
            overfit: if True, then takes only one batch per loader
                for model overfitting, for advance usage please check
                ``BatchOverfitCallback``
            load_best_on_end: if True, Runner will load
                best checkpoint state (model, optimizer, etc)
                according to validation metrics. Requires specified ``logdir``.
            fp16: boolean flag to use half-precision training (AMP > APEX)
            amp: boolean flag to use amp half-precision
            apex: boolean flag to use apex half-precision
            ddp: if `True` will start training in distributed mode.
                Note: Works only with python scripts. No jupyter support.

        .. note::
            Please follow the `minimal examples`_ sections for use cases.

            .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples

        Examples:

        .. code-block:: python

            import os
            from torch import nn, optim
            from torch.nn import functional as F
            from torch.utils.data import DataLoader
            from catalyst import dl, metrics
            from catalyst.data.transforms import ToTensor
            from catalyst.contrib.datasets import MNIST

            model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10))
            optimizer = optim.Adam(model.parameters(), lr=0.02)

            loaders = {
                "train": DataLoader(
                    MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()),
                    batch_size=32
                ),
                "valid": DataLoader(
                    MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()),
                    batch_size=32
                ),
            }

            class CustomRunner(dl.Runner):
                def predict_batch(self, batch):
                    # model inference step
                    return self.model(batch[0].to(self.device))

                def on_loader_start(self, runner):
                    super().on_loader_start(runner)
                    self.meters = {
                        key: metrics.AdditiveValueMetric(compute_on_call=False)
                        for key in ["loss", "accuracy01", "accuracy03"]
                    }

                def handle_batch(self, batch):
                    # model train/valid step
                    # unpack the batch
                    x, y = batch
                    # run model forward pass
                    logits = self.model(x)
                    # compute the loss
                    loss = F.cross_entropy(logits, y)
                    # compute other metrics of interest
                    accuracy01, accuracy03 = metrics.accuracy(logits, y, topk=(1, 3))
                    # log metrics
                    self.batch_metrics.update(
                        {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03}
                    )
                    for key in ["loss", "accuracy01", "accuracy03"]:
                        self.meters[key].update(
                            self.batch_metrics[key].item(),
                            self.batch_size
                        )
                    # run model backward pass
                    if self.is_train_loader:
                        loss.backward()
                        self.optimizer.step()
                        self.optimizer.zero_grad()

                def on_loader_end(self, runner):
                    for key in ["loss", "accuracy01", "accuracy03"]:
                        self.loader_metrics[key] = self.meters[key].compute()[0]
                    super().on_loader_end(runner)

            runner = CustomRunner()
            # model training
            runner.train(
                model=model,
                optimizer=optimizer,
                loaders=loaders,
                logdir="./logs",
                num_epochs=5,
                verbose=True,
                valid_loader="valid",
                valid_metric="loss",
                minimize_valid_metric=True,
            )
            # model inference
            for logits in runner.predict_loader(loader=loaders["valid"]):
                assert logits.detach().cpu().numpy().shape[-1] == 10
        """
        # experiment setup
        self._engine = engine or get_available_engine(fp16=fp16, ddp=ddp, amp=amp, apex=apex)
        self._trial = trial
        self._loggers = loggers
        # the data
        self._loaders = loaders
        # the components
        self._model = model
        self._criterion = criterion
        self._optimizer = optimizer
        self._scheduler = scheduler
        # the callbacks
        self._callbacks = callbacks
        # extra
        self._stage = "train"
        self._seed = seed
        self._hparams = hparams
        self._num_epochs = num_epochs
        self._logdir = logdir
        self._valid_loader = valid_loader
        self._valid_metric = valid_metric
        self._minimize_valid_metric = minimize_valid_metric
        self._verbose = verbose
        self._timeit = timeit
        self._check = check
        self._overfit = overfit
        self._load_best_on_end = load_best_on_end
        # run
        self.run()
Esempio n. 9
0
 def get_engine(self) -> IEngine:
     """Returns the engine for a run."""
     return self._engine or get_available_engine()
Esempio n. 10
0
    def train(
        self,
        *,
        # the data
        loaders: "OrderedDict[str, DataLoader]",
        # the core
        model: Model,
        engine: Union["IEngine", str] = None,
        trial: ITrial = None,
        # the components
        criterion: Criterion = None,
        optimizer: Optimizer = None,
        scheduler: Scheduler = None,
        # the callbacks
        callbacks: "Union[List[Callback], OrderedDict[str, Callback]]" = None,
        # the loggers
        loggers: "Dict[str, ILogger]" = None,
        # experiment info
        seed: int = 42,
        hparams: Dict[str, Any] = None,
        # stage info
        num_epochs: int = 1,
        # extra info (callbacks info)
        logdir: str = None,
        valid_loader: str = None,
        valid_metric: str = None,
        minimize_valid_metric: bool = True,
        verbose: bool = False,
        timeit: bool = False,
        check: bool = False,
        overfit: bool = False,
        load_best_on_end: bool = False,
        # engine extra params,
        fp16: bool = False,
        amp: bool = False,
        apex: bool = False,
        ddp: bool = False,
    ) -> None:
        """
        Starts the train stage of the model.

        Args:
            loaders: dictionary with one or several ``torch.utils.data.DataLoader``
                for training, validation or inference
            model: model to train
            engine: engine to use for model training
            trial: trial to use during model training
            criterion: criterion function for training
            optimizer: optimizer for training
            scheduler: scheduler for training
            callbacks: list or dictionary with Catalyst callbacks
            loggers: dictionary with Catalyst loggers
            seed: experiment's initial seed value
            hparams: hyperparameters for the run
            num_epochs: number of training epochs
            logdir: path to output directory
            valid_loader: loader name used to calculate
                the metrics and save the checkpoints. For example,
                you can pass `train` and then
                the metrics will be taken from `train` loader.
            valid_metric: the key to the name of the metric
                by which the checkpoints will be selected.
            minimize_valid_metric: flag to indicate whether
                the ``valid_metric`` should be minimized or not.
            verbose: if `True`, it displays the status of the training to the console.
            timeit: if True, computes the execution time
                of training process and displays it to the console.
            check: if True, then only checks that pipeline is working
                (3 epochs only with 3 batches per loader)
            overfit: if True, then takes only one batch per loader
                for model overfitting, for advance usage please check
                ``BatchOverfitCallback``
            load_best_on_end: if True, Runner will load
                best checkpoint state (model, optimizer, etc)
                according to validation metrics. Requires specified ``logdir``.
            fp16: boolean flag to use half-precision training (AMP > APEX)
            amp: boolean flag to use amp half-precision
            apex: boolean flag to use apex half-precision
            ddp: if `True` will start training in distributed mode.
                Note: Works only with python scripts. No jupyter support.
        """
        # experiment setup
        self._engine = engine or get_available_engine(
            fp16=fp16, ddp=ddp, amp=amp, apex=apex)
        self._trial = trial
        self._loggers = loggers
        # the data
        self._loaders = loaders
        # the components
        self._model = model
        self._criterion = criterion
        self._optimizer = optimizer
        self._scheduler = scheduler
        # the callbacks
        self._callbacks = callbacks
        # extra
        self._stage = "train"
        self._seed = seed
        self._hparams = hparams
        self._num_epochs = num_epochs
        self._logdir = logdir
        self._valid_loader = valid_loader
        self._valid_metric = valid_metric
        self._minimize_valid_metric = minimize_valid_metric
        self._verbose = verbose
        self._timeit = timeit
        self._check = check
        self._overfit = overfit
        self._load_best_on_end = load_best_on_end
        # run
        self.run()
Esempio n. 11
0
    def train(
        self,
        *,
        # the data
        loaders: "OrderedDict[str, DataLoader]",
        # the core
        model: TorchModel,
        engine: Union["Engine", str] = None,
        # the components
        criterion: TorchCriterion = None,
        optimizer: TorchOptimizer = None,
        scheduler: TorchScheduler = None,
        # the callbacks
        callbacks: "Union[List[Callback], OrderedDict[str, Callback]]" = None,
        # the loggers
        loggers: "Dict[str, ILogger]" = None,
        # experiment info
        seed: int = 42,
        hparams: Dict[str, Any] = None,
        num_epochs: int = 1,
        # extra info (callbacks info)
        logdir: str = None,
        resume: str = None,
        valid_loader: str = None,
        valid_metric: str = None,
        minimize_valid_metric: bool = None,
        verbose: bool = False,
        timeit: bool = False,
        check: bool = False,
        overfit: bool = False,
        profile: bool = False,
        load_best_on_end: bool = False,
        # engine extra params,
        cpu: bool = False,
        fp16: bool = False,
        ddp: bool = False,
    ) -> None:
        """
        Starts the training of the model.

        Args:
            loaders: dictionary with one or several ``torch.utils.data.DataLoader``
                for training, validation or inference
            model: model to train
            engine: engine to use for model training
            criterion: criterion function for training
            optimizer: optimizer for training
            scheduler: scheduler for training
            callbacks: list or dictionary with Catalyst callbacks
            loggers: dictionary with Catalyst loggers
            seed: experiment's initial seed value
            hparams: hyperparameters for the run
            num_epochs: number of training epochs
            logdir: path to output directory
            resume: path to checkpoint for model
            valid_loader: loader name used to calculate
                the metrics and save the checkpoints. For example,
                you can pass `train` and then
                the metrics will be taken from `train` loader.
            valid_metric: the key to the name of the metric
                by which the checkpoints will be selected.
            minimize_valid_metric: flag to indicate whether
                the ``valid_metric`` should be minimized or not (default: True).
            verbose: if `True`, it displays the status of the training to the console.
            timeit: if True, computes the execution time
                of training process and displays it to the console.
            check: if True, then only checks that pipeline is working
                (3 epochs only with 3 batches per loader)
            overfit: if True, then takes only one batch per loader
                for model overfitting, for advance usage please check
                ``BatchOverfitCallback``
            profile: if True, then uses ProfilerCallback, for advance usage please check
                ``ProfilerCallback``
            load_best_on_end: if True, Runner will load
                best checkpoint state (model, optimizer, etc)
                according to validation metrics. Requires specified ``logdir``.
            cpu: boolean flag to force CPU usage
            fp16: boolean flag to use half-precision
            ddp: if `True` will start training in distributed mode.
                Note: Works only with python scripts. No jupyter support.

        .. note::
            Please follow the `minimal examples`_ sections for use cases.

            .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples  # noqa: E501, W505

        """
        # experiment setup
        self._engine = engine or get_available_engine(cpu=cpu, fp16=fp16, ddp=ddp)
        # self._trial = trial
        self._loggers = loggers
        # the data
        self._loaders = loaders
        # the components
        self._model = model
        self._criterion = criterion
        self._optimizer = optimizer
        self._scheduler = scheduler
        # the callbacks
        self._callbacks = callbacks
        # extra
        self._seed = seed
        self._hparams = hparams
        self._num_epochs = num_epochs
        self._logdir = logdir
        self._resume = resume
        self._valid_loader = valid_loader
        self._valid_metric = valid_metric
        self._minimize_valid_metric = minimize_valid_metric
        self._verbose = verbose
        self._timeit = timeit
        self._check = check
        self._overfit = overfit
        self._profile = profile
        self._load_best_on_end = load_best_on_end
        # run
        self.run()