def runner(): return dl.SupervisedRunner( engine=get_available_engine(), input_key="features", output_key="logits", target_key="targets", loss_key="loss", )
def get_engine(self) -> IEngine: """Returns the engine for the run.""" engine_params = self._config.engine if engine_params is not None: engine = hydra.utils.instantiate(engine_params) else: engine = get_available_engine( fp16=self._fp16, ddp=self._ddp, amp=self._amp, apex=self._apex ) return engine
def get_engine(self) -> IEngine: """Returns the engine for the run.""" engine_params = self._config.get("engine", None) if engine_params is not None: engine = REGISTRY.get_from_params(**engine_params) else: engine = get_available_engine(fp16=self._fp16, ddp=self._ddp, amp=self._amp, apex=self._apex) return engine
def predict_loader( self, *, loader: DataLoader, model: TorchModel = None, engine: Union["Engine", str] = None, seed: int = 42, # extra info resume: str = None, # engine extra params, cpu: bool = False, fp16: bool = False, ) -> Generator: """ Runs model inference on PyTorch DataLoader and returns python generator with model predictions from `runner.predict_batch`. Args: loader: loader to predict model: model to use for prediction engine: engine to use for prediction seed: random seed to use before prediction resume: path to checkpoint for model cpu: boolean flag to force CPU usage fp16: boolean flag to use half-precision Yields: bathes with model predictions .. note:: Please follow the `minimal examples`_ sections for use cases. .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples # noqa: E501, W505 """ self.engine = engine or get_available_engine(cpu=cpu, fp16=fp16) if model is not None: self.model = model assert self.model is not None if resume is not None: self.engine.wait_for_everyone() unwrapped_model = self.engine.unwrap_model(self.model) unwrapped_model.load_state_dict(load_checkpoint(resume)) self.model = self.engine.prepare(self.model) maybe_recursive_call(self.model, "train", mode=False) loader = self.engine.prepare(loader) set_global_seed(seed) for batch in loader: yield self.predict_batch(batch)
def predict_loader( self, *, loader: DataLoader, model: Model = None, engine: Union["IEngine", str] = None, seed: int = 42, # engine extra params, fp16: bool = False, amp: bool = False, apex: bool = False, ddp: bool = False, ) -> Generator: """ Runs model inference on PyTorch DataLoader and returns python generator with model predictions from `runner.predict_batch`. Args: loader: loader to predict model: model to use for prediction engine: engine to use for prediction seed: random seed to use before prediction fp16: boolean flag to use half-precision training (AMP > APEX) amp: boolean flag to use amp half-precision apex: boolean flag to use apex half-precision ddp: if `True` will start training in distributed mode. Note: Works only with python scripts. No jupyter support. Yields: bathes with model predictions """ self._engine = engine or get_available_engine( fp16=fp16, ddp=ddp, amp=amp, apex=apex) if model is not None: self.model = model assert self.model is not None # if resume is not None: # checkpoint = load_checkpoint(resume) # unpack_checkpoint(checkpoint, model=self.model) self.model = self.engine.sync_device(self.model) maybe_recursive_call(self.model, "train", mode=False) set_global_seed(seed) for batch in loader: yield self.predict_batch(batch)
def predict_loader( self, *, loader: DataLoader, model: Model = None, engine: Union["IEngine", str] = None, seed: int = 42, ) -> Generator: """ Runs model inference on PyTorch DataLoader and returns python generator with model predictions from `runner.predict_batch`. Args: loader: loader to predict model: model to use for prediction engine: engine to use for prediction seed: random seed to use before prediction Yields: bathes with model predictions """ if engine is not None: self.engine = engine if self.engine is None: self.engine = get_available_engine() if model is not None: self.model = model assert self.model is not None # if resume is not None: # checkpoint = load_checkpoint(resume) # unpack_checkpoint(checkpoint, model=self.model) self.model = self.engine.sync_device(self.model) maybe_recursive_call(self.model, "train", mode=False) set_global_seed(seed) for batch in loader: yield self.predict_batch(batch)
def predict_loader( self, *, loader: DataLoader, model: Model = None, engine: Union["IEngine", str] = None, seed: int = 42, # engine extra params, fp16: bool = False, amp: bool = False, apex: bool = False, ddp: bool = False, ) -> Generator: """ Runs model inference on PyTorch DataLoader and returns python generator with model predictions from `runner.predict_batch`. Args: loader: loader to predict model: model to use for prediction engine: engine to use for prediction seed: random seed to use before prediction fp16: boolean flag to use half-precision training (AMP > APEX) amp: boolean flag to use amp half-precision apex: boolean flag to use apex half-precision ddp: if `True` will start training in distributed mode. Note: Works only with python scripts. No jupyter support. Yields: bathes with model predictions .. note:: Please follow the `minimal examples`_ sections for use cases. .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples Examples: .. code-block:: python import os from torch import nn, optim from torch.nn import functional as F from torch.utils.data import DataLoader from catalyst import dl, metrics from catalyst.data.transforms import ToTensor from catalyst.contrib.datasets import MNIST model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) optimizer = optim.Adam(model.parameters(), lr=0.02) loaders = { "train": DataLoader( MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32 ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), } class CustomRunner(dl.Runner): def predict_batch(self, batch): # model inference step return self.model(batch[0].to(self.device)) def on_loader_start(self, runner): super().on_loader_start(runner) self.meters = { key: metrics.AdditiveValueMetric(compute_on_call=False) for key in ["loss", "accuracy01", "accuracy03"] } def handle_batch(self, batch): # model train/valid step # unpack the batch x, y = batch # run model forward pass logits = self.model(x) # compute the loss loss = F.cross_entropy(logits, y) # compute other metrics of interest accuracy01, accuracy03 = metrics.accuracy(logits, y, topk=(1, 3)) # log metrics self.batch_metrics.update( {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} ) for key in ["loss", "accuracy01", "accuracy03"]: self.meters[key].update( self.batch_metrics[key].item(), self.batch_size ) # run model backward pass if self.is_train_loader: loss.backward() self.optimizer.step() self.optimizer.zero_grad() def on_loader_end(self, runner): for key in ["loss", "accuracy01", "accuracy03"]: self.loader_metrics[key] = self.meters[key].compute()[0] super().on_loader_end(runner) runner = CustomRunner() # model training runner.train( model=model, optimizer=optimizer, loaders=loaders, logdir="./logs", num_epochs=5, verbose=True, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, ) # model inference for logits in runner.predict_loader(loader=loaders["valid"]): assert logits.detach().cpu().numpy().shape[-1] == 10 """ self._engine = engine or get_available_engine(fp16=fp16, ddp=ddp, amp=amp, apex=apex) if model is not None: self.model = model assert self.model is not None # if resume is not None: # checkpoint = load_checkpoint(resume) # unpack_checkpoint(checkpoint, model=self.model) self.model = self.engine.sync_device(self.model) maybe_recursive_call(self.model, "train", mode=False) set_global_seed(seed) for batch in loader: yield self.predict_batch(batch)
def train( self, *, # the data loaders: "OrderedDict[str, DataLoader]", # the core model: Model, engine: Union["IEngine", str] = None, trial: ITrial = None, # the components criterion: Criterion = None, optimizer: Optimizer = None, scheduler: Scheduler = None, # the callbacks callbacks: "Union[List[Callback], OrderedDict[str, Callback]]" = None, # the loggers loggers: "Dict[str, ILogger]" = None, # experiment info seed: int = 42, hparams: Dict[str, Any] = None, # stage info num_epochs: int = 1, # extra info (callbacks info) logdir: str = None, valid_loader: str = None, valid_metric: str = None, minimize_valid_metric: bool = True, verbose: bool = False, timeit: bool = False, check: bool = False, overfit: bool = False, load_best_on_end: bool = False, # engine extra params, fp16: bool = False, amp: bool = False, apex: bool = False, ddp: bool = False, ) -> None: """ Starts the train stage of the model. Args: loaders: dictionary with one or several ``torch.utils.data.DataLoader`` for training, validation or inference model: model to train engine: engine to use for model training trial: trial to use during model training criterion: criterion function for training optimizer: optimizer for training scheduler: scheduler for training callbacks: list or dictionary with Catalyst callbacks loggers: dictionary with Catalyst loggers seed: experiment's initial seed value hparams: hyperparameters for the run num_epochs: number of training epochs logdir: path to output directory valid_loader: loader name used to calculate the metrics and save the checkpoints. For example, you can pass `train` and then the metrics will be taken from `train` loader. valid_metric: the key to the name of the metric by which the checkpoints will be selected. minimize_valid_metric: flag to indicate whether the ``valid_metric`` should be minimized or not (default: True). verbose: if `True`, it displays the status of the training to the console. timeit: if True, computes the execution time of training process and displays it to the console. check: if True, then only checks that pipeline is working (3 epochs only with 3 batches per loader) overfit: if True, then takes only one batch per loader for model overfitting, for advance usage please check ``BatchOverfitCallback`` load_best_on_end: if True, Runner will load best checkpoint state (model, optimizer, etc) according to validation metrics. Requires specified ``logdir``. fp16: boolean flag to use half-precision training (AMP > APEX) amp: boolean flag to use amp half-precision apex: boolean flag to use apex half-precision ddp: if `True` will start training in distributed mode. Note: Works only with python scripts. No jupyter support. .. note:: Please follow the `minimal examples`_ sections for use cases. .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples Examples: .. code-block:: python import os from torch import nn, optim from torch.nn import functional as F from torch.utils.data import DataLoader from catalyst import dl, metrics from catalyst.data.transforms import ToTensor from catalyst.contrib.datasets import MNIST model = nn.Sequential(nn.Flatten(), nn.Linear(28 * 28, 10)) optimizer = optim.Adam(model.parameters(), lr=0.02) loaders = { "train": DataLoader( MNIST(os.getcwd(), train=True, download=True, transform=ToTensor()), batch_size=32 ), "valid": DataLoader( MNIST(os.getcwd(), train=False, download=True, transform=ToTensor()), batch_size=32 ), } class CustomRunner(dl.Runner): def predict_batch(self, batch): # model inference step return self.model(batch[0].to(self.device)) def on_loader_start(self, runner): super().on_loader_start(runner) self.meters = { key: metrics.AdditiveValueMetric(compute_on_call=False) for key in ["loss", "accuracy01", "accuracy03"] } def handle_batch(self, batch): # model train/valid step # unpack the batch x, y = batch # run model forward pass logits = self.model(x) # compute the loss loss = F.cross_entropy(logits, y) # compute other metrics of interest accuracy01, accuracy03 = metrics.accuracy(logits, y, topk=(1, 3)) # log metrics self.batch_metrics.update( {"loss": loss, "accuracy01": accuracy01, "accuracy03": accuracy03} ) for key in ["loss", "accuracy01", "accuracy03"]: self.meters[key].update( self.batch_metrics[key].item(), self.batch_size ) # run model backward pass if self.is_train_loader: loss.backward() self.optimizer.step() self.optimizer.zero_grad() def on_loader_end(self, runner): for key in ["loss", "accuracy01", "accuracy03"]: self.loader_metrics[key] = self.meters[key].compute()[0] super().on_loader_end(runner) runner = CustomRunner() # model training runner.train( model=model, optimizer=optimizer, loaders=loaders, logdir="./logs", num_epochs=5, verbose=True, valid_loader="valid", valid_metric="loss", minimize_valid_metric=True, ) # model inference for logits in runner.predict_loader(loader=loaders["valid"]): assert logits.detach().cpu().numpy().shape[-1] == 10 """ # experiment setup self._engine = engine or get_available_engine(fp16=fp16, ddp=ddp, amp=amp, apex=apex) self._trial = trial self._loggers = loggers # the data self._loaders = loaders # the components self._model = model self._criterion = criterion self._optimizer = optimizer self._scheduler = scheduler # the callbacks self._callbacks = callbacks # extra self._stage = "train" self._seed = seed self._hparams = hparams self._num_epochs = num_epochs self._logdir = logdir self._valid_loader = valid_loader self._valid_metric = valid_metric self._minimize_valid_metric = minimize_valid_metric self._verbose = verbose self._timeit = timeit self._check = check self._overfit = overfit self._load_best_on_end = load_best_on_end # run self.run()
def get_engine(self) -> IEngine: """Returns the engine for a run.""" return self._engine or get_available_engine()
def train( self, *, # the data loaders: "OrderedDict[str, DataLoader]", # the core model: Model, engine: Union["IEngine", str] = None, trial: ITrial = None, # the components criterion: Criterion = None, optimizer: Optimizer = None, scheduler: Scheduler = None, # the callbacks callbacks: "Union[List[Callback], OrderedDict[str, Callback]]" = None, # the loggers loggers: "Dict[str, ILogger]" = None, # experiment info seed: int = 42, hparams: Dict[str, Any] = None, # stage info num_epochs: int = 1, # extra info (callbacks info) logdir: str = None, valid_loader: str = None, valid_metric: str = None, minimize_valid_metric: bool = True, verbose: bool = False, timeit: bool = False, check: bool = False, overfit: bool = False, load_best_on_end: bool = False, # engine extra params, fp16: bool = False, amp: bool = False, apex: bool = False, ddp: bool = False, ) -> None: """ Starts the train stage of the model. Args: loaders: dictionary with one or several ``torch.utils.data.DataLoader`` for training, validation or inference model: model to train engine: engine to use for model training trial: trial to use during model training criterion: criterion function for training optimizer: optimizer for training scheduler: scheduler for training callbacks: list or dictionary with Catalyst callbacks loggers: dictionary with Catalyst loggers seed: experiment's initial seed value hparams: hyperparameters for the run num_epochs: number of training epochs logdir: path to output directory valid_loader: loader name used to calculate the metrics and save the checkpoints. For example, you can pass `train` and then the metrics will be taken from `train` loader. valid_metric: the key to the name of the metric by which the checkpoints will be selected. minimize_valid_metric: flag to indicate whether the ``valid_metric`` should be minimized or not. verbose: if `True`, it displays the status of the training to the console. timeit: if True, computes the execution time of training process and displays it to the console. check: if True, then only checks that pipeline is working (3 epochs only with 3 batches per loader) overfit: if True, then takes only one batch per loader for model overfitting, for advance usage please check ``BatchOverfitCallback`` load_best_on_end: if True, Runner will load best checkpoint state (model, optimizer, etc) according to validation metrics. Requires specified ``logdir``. fp16: boolean flag to use half-precision training (AMP > APEX) amp: boolean flag to use amp half-precision apex: boolean flag to use apex half-precision ddp: if `True` will start training in distributed mode. Note: Works only with python scripts. No jupyter support. """ # experiment setup self._engine = engine or get_available_engine( fp16=fp16, ddp=ddp, amp=amp, apex=apex) self._trial = trial self._loggers = loggers # the data self._loaders = loaders # the components self._model = model self._criterion = criterion self._optimizer = optimizer self._scheduler = scheduler # the callbacks self._callbacks = callbacks # extra self._stage = "train" self._seed = seed self._hparams = hparams self._num_epochs = num_epochs self._logdir = logdir self._valid_loader = valid_loader self._valid_metric = valid_metric self._minimize_valid_metric = minimize_valid_metric self._verbose = verbose self._timeit = timeit self._check = check self._overfit = overfit self._load_best_on_end = load_best_on_end # run self.run()
def train( self, *, # the data loaders: "OrderedDict[str, DataLoader]", # the core model: TorchModel, engine: Union["Engine", str] = None, # the components criterion: TorchCriterion = None, optimizer: TorchOptimizer = None, scheduler: TorchScheduler = None, # the callbacks callbacks: "Union[List[Callback], OrderedDict[str, Callback]]" = None, # the loggers loggers: "Dict[str, ILogger]" = None, # experiment info seed: int = 42, hparams: Dict[str, Any] = None, num_epochs: int = 1, # extra info (callbacks info) logdir: str = None, resume: str = None, valid_loader: str = None, valid_metric: str = None, minimize_valid_metric: bool = None, verbose: bool = False, timeit: bool = False, check: bool = False, overfit: bool = False, profile: bool = False, load_best_on_end: bool = False, # engine extra params, cpu: bool = False, fp16: bool = False, ddp: bool = False, ) -> None: """ Starts the training of the model. Args: loaders: dictionary with one or several ``torch.utils.data.DataLoader`` for training, validation or inference model: model to train engine: engine to use for model training criterion: criterion function for training optimizer: optimizer for training scheduler: scheduler for training callbacks: list or dictionary with Catalyst callbacks loggers: dictionary with Catalyst loggers seed: experiment's initial seed value hparams: hyperparameters for the run num_epochs: number of training epochs logdir: path to output directory resume: path to checkpoint for model valid_loader: loader name used to calculate the metrics and save the checkpoints. For example, you can pass `train` and then the metrics will be taken from `train` loader. valid_metric: the key to the name of the metric by which the checkpoints will be selected. minimize_valid_metric: flag to indicate whether the ``valid_metric`` should be minimized or not (default: True). verbose: if `True`, it displays the status of the training to the console. timeit: if True, computes the execution time of training process and displays it to the console. check: if True, then only checks that pipeline is working (3 epochs only with 3 batches per loader) overfit: if True, then takes only one batch per loader for model overfitting, for advance usage please check ``BatchOverfitCallback`` profile: if True, then uses ProfilerCallback, for advance usage please check ``ProfilerCallback`` load_best_on_end: if True, Runner will load best checkpoint state (model, optimizer, etc) according to validation metrics. Requires specified ``logdir``. cpu: boolean flag to force CPU usage fp16: boolean flag to use half-precision ddp: if `True` will start training in distributed mode. Note: Works only with python scripts. No jupyter support. .. note:: Please follow the `minimal examples`_ sections for use cases. .. _`minimal examples`: https://github.com/catalyst-team/catalyst#minimal-examples # noqa: E501, W505 """ # experiment setup self._engine = engine or get_available_engine(cpu=cpu, fp16=fp16, ddp=ddp) # self._trial = trial self._loggers = loggers # the data self._loaders = loaders # the components self._model = model self._criterion = criterion self._optimizer = optimizer self._scheduler = scheduler # the callbacks self._callbacks = callbacks # extra self._seed = seed self._hparams = hparams self._num_epochs = num_epochs self._logdir = logdir self._resume = resume self._valid_loader = valid_loader self._valid_metric = valid_metric self._minimize_valid_metric = minimize_valid_metric self._verbose = verbose self._timeit = timeit self._check = check self._overfit = overfit self._profile = profile self._load_best_on_end = load_best_on_end # run self.run()