def __init__(self, min_steps: Optional[int] = 0, max_steps: int = -1) -> None: super().__init__() if max_steps is None: rank_zero_deprecation( "Setting `max_steps = None` is deprecated in v1.5 and will no longer be supported in v1.7." " Use `max_steps = -1` instead." ) max_steps = -1 elif max_steps < -1: raise MisconfigurationException( f"`max_steps` must be a non-negative integer or -1 (infinite steps). You passed in {max_steps}." ) self.min_steps = min_steps self.max_steps = max_steps self.global_step: int = 0 self.batch_progress = BatchProgress() self.scheduler_progress = SchedulerProgress() self.batch_loop: Optional[TrainingBatchLoop] = None self.val_loop: Optional["loops.EvaluationLoop"] = None self._results = ResultCollection(training=True) self._outputs: _OUTPUTS_TYPE = [] self._warning_cache = WarningCache() self._dataloader_iter: Optional[Iterator] = None # caches the loaded dataloader state until dataloader objects are available self._dataloader_state_dict: Dict[str, Any] = {}
def __init__(self, trainer: 'pl.Trainer'): self.trainer: 'pl.Trainer' = trainer self.outputs: EPOCH_OUTPUT = [] self.predictions: Optional[PredictionCollection] = None self.max_batches: Optional[List[Union[int, float]]] = None self.warning_cache = WarningCache() self.num_dataloaders: Optional[int] = None
def __init__(self, min_steps: int, max_steps: int): super().__init__() self.min_steps: int = min_steps self.max_steps: int = max_steps self.global_step: int = 0 # the total batch index across all epochs self.total_batch_idx: int = 0 # the current batch index in the loop that runs over the dataloader(s) self.iteration_count: int = 0 # the current split index when the batch gets split into chunks in truncated backprop through time self.split_idx: Optional[int] = None self._dataloader_idx: Optional[int] = None self._should_stop: bool = False self.is_last_batch: Optional[bool] = None self.batches_seen: int = 0 self.warning_cache: WarningCache = WarningCache() self.epoch_output: Optional[List[List[STEP_OUTPUT]]] = None self.batch_loop: Optional[TrainingBatchLoop] = None self._results = ResultCollection(training=True)
class Closure(AbstractClosure[ClosureResult]): """An implementation of a :class:`AbstractClosure` for automatic optimization in Lightning that combines three elementary closures into one: ``training_step``, ``backward`` and ``zero_grad``. The Closure gets created by the training loop(s) and is then passed to the :meth:`torch.optim.Optimizer.step` method. An optimizer is responsible for calling the closure and optionally do something with the output. Args: step_fn: This is typically the :meth:`pytorch_lightning.core.lightning.LightningModule.training_step wrapped with processing for its outputs backward_fn: A function that takes a loss value as input, performs back-propagation and returns the loss value. Can be set to ``None`` to skip the backward operation. zero_grad_fn: A function that zeroes the gradients. Can be set to ``None`` to skip zero_grad, for example when accumulating gradients. profiler: A profiler for profiling the actions of the passed in closure functions. Example: closure = Closure() optimizer = torch.optim.Adam(...) optimizer.step(closure) """ warning_cache = WarningCache() def __init__( self, step_fn: Callable[[], ClosureResult], backward_fn: Optional[Callable[[Tensor], None]] = None, zero_grad_fn: Optional[Callable[[], None]] = None, profiler: Optional[BaseProfiler] = None, ): super().__init__() self._step_fn = step_fn self._backward_fn = backward_fn self._zero_grad_fn = zero_grad_fn self._profiler = PassThroughProfiler( ) if profiler is None else profiler def closure(self, *args: Any, **kwargs: Any) -> ClosureResult: with self._profiler.profile("training_step_and_backward"): step_output = self._step_fn() if step_output.closure_loss is None: self.warning_cache.warn( "`training_step` returned `None`. If this was on purpose, ignore this warning..." ) if self._zero_grad_fn is not None: self._zero_grad_fn() if self._backward_fn is not None and step_output.closure_loss is not None: self._backward_fn(step_output.closure_loss) return step_output def __call__(self, *args: Any, **kwargs: Any) -> Optional[Tensor]: self._result = self.closure(*args, **kwargs) return self._result.loss
def __init__(self, trainer): self.trainer = trainer self.outputs = [] self.step_metrics = [] self.predictions = None self.max_batches = None self.warning_cache = WarningCache() self.num_dataloaders = None
def __init__(self) -> None: super().__init__() self.return_predictions: bool = False self.predictions: List[Any] = [] self.current_batch_indices: List[int] = [] self._dl_max_batches: Optional[int] = None self._num_dataloaders: Optional[int] = None self._warning_cache = WarningCache() self._all_batch_indices: List[int] = []
def __init__(self, trainer): self.trainer = trainer self.max_batches = None self.num_dataloaders = None self.warning_cache = WarningCache() self.batch_indices: Optional[List[int]] = None self.epoch_batch_indices: Optional[List[List[int]]] = None # `DDPSpawnPlugin` plugins and derivate don't support return predictions. self._return_predictions: Optional[bool] = None self._previous_grad_status: Optional[bool] = None
def __init__(self, trainer, multiple_trainloader_mode: str): self.trainer = trainer self.accumulated_loss = None self.warning_cache = WarningCache() self._teardown_already_run = False self.running_loss = TensorRunningAccum(window_length=20) self._curr_step_result = None self._cur_grad_norm_dict = None self._multiple_trainloader_mode = multiple_trainloader_mode self._skip_backward = False self.trainer._multiple_trainloader_mode = multiple_trainloader_mode
def __init__(self) -> None: super().__init__() self.return_predictions = False self.predictions: List[Any] = [] self.current_batch_indices: List[int] = [] self.batch_progress = Progress() self._dl_max_batches = 0 self._num_dataloaders = 0 self._warning_cache = WarningCache() self._seen_batch_indices: List[List[int]] = []
def __init__(self) -> None: super().__init__() self.accumulated_loss: Optional[Tensor] = None self.batch_outputs: Optional[List[List[STEP_OUTPUT]]] = None self.running_loss: TensorRunningAccum = TensorRunningAccum(window_length=20) self.batch_idx: int = 0 self.split_idx: Optional[int] = None self.warning_cache: WarningCache = WarningCache() self._hiddens: Optional[Tensor] = None self._optimizer_freq_cumsum: Optional[int] = None self._remaining_splits: Optional[List[Any]] = None self._skip_backward: bool = False
def __init__(self, trainer, multiple_trainloader_mode): self.trainer = trainer self.early_stopping_accumulator = None self.checkpoint_accumulator = None self.accumulated_loss = None self.warning_cache = WarningCache() self._teardown_already_run = False self.running_loss = TensorRunningAccum(window_length=20) self.automatic_optimization = True self._curr_step_result = None self._cur_grad_norm_dict = None self._multiple_trainloader_mode = multiple_trainloader_mode self._skip_backward = False self.trainer._multiple_trainloader_mode = multiple_trainloader_mode
def __init__(self) -> None: super().__init__() self.accumulated_loss: Optional[Tensor] = None self.batch_outputs: Optional[List[List[STEP_OUTPUT]]] = None self.running_loss: TensorRunningAccum = TensorRunningAccum( window_length=20) # the current split index when the batch gets split into chunks in truncated backprop through time self.split_idx: Optional[int] = None self.optimizer_loop = OptimizerLoop() self._warning_cache: WarningCache = WarningCache() self._hiddens: Optional[Tensor] = None self._optimizer_freq_cumsum: Optional[int] = None self._remaining_splits: Optional[List[Any]] = None
def __init__( self, trainer, max_epochs: Optional[int], min_epochs: Optional[int], max_steps: Optional[int], min_steps: Optional[int], num_sanity_val_steps: int, ): self.trainer = trainer self.accumulated_loss = None self.warning_cache = WarningCache() self._teardown_already_run = False self.running_loss = TensorRunningAccum(window_length=20) self._skip_backward = False self._optimizer_freq_cumsum = None self._hiddens = None self.global_step = 0 self.current_epoch = 0 self.trainer.should_stop = False # the total batch index across all epochs self.total_batch_idx = 0 # the current batch index in the loop that runs over the dataloader(s) self.batch_idx = 0 # the current split index when the batch gets split into chunks in truncated backprop through time self.split_idx = None self.trainer.num_training_batches = 0 self.trainer.train_dataloader = None # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000 self.max_epochs = 1000 if (max_epochs is None and max_steps is None) else max_epochs # If neither min_epochs or min_steps is set, then use existing default of min_epochs = 1 self.min_epochs = 1 if (min_epochs is None and min_steps is None) else min_epochs self.max_steps = max_steps self.min_steps = min_steps if num_sanity_val_steps == -1: self.trainer.num_sanity_val_steps = float("inf") else: self.trainer.num_sanity_val_steps = num_sanity_val_steps
def __init__(self, min_steps: int, max_steps: int): super().__init__() self.min_steps: int = min_steps self.max_steps: int = max_steps self.global_step: int = 0 # the total batch index across all epochs self.total_batch_idx: int = 0 self.is_last_batch: Optional[bool] = None self.batch_progress = Progress() self.scheduler_progress = SchedulerProgress() self.batch_loop: Optional[TrainingBatchLoop] = None self.val_loop: Optional["loops.EvaluationLoop"] = None self._results = ResultCollection(training=True) self._dataloader_idx: Optional[int] = None self._warning_cache: WarningCache = WarningCache() self._epoch_output: Optional[List[List[STEP_OUTPUT]]] = None
def __init__( self, trainer, multiple_trainloader_mode: str, max_epochs: Optional[int], min_epochs: Optional[int], max_steps: Optional[int], min_steps: Optional[int], num_sanity_val_steps: int, ): self.trainer = trainer self.accumulated_loss = None self.warning_cache = WarningCache() self._teardown_already_run = False self.running_loss = TensorRunningAccum(window_length=20) self._curr_step_result = None self._cur_grad_norm_dict = None self._multiple_trainloader_mode = multiple_trainloader_mode self._skip_backward = False self.trainer._multiple_trainloader_mode = multiple_trainloader_mode self._optimizer_freq_cumsum = None self.global_step = 0 self.current_epoch = 0 self.trainer.should_stop = False self.total_batch_idx = 0 self.batch_idx = 0 self.trainer.num_training_batches = 0 self.trainer.train_dataloader = None # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000 self.max_epochs = 1000 if (max_epochs is None and max_steps is None) else max_epochs # If neither min_epochs or min_steps is set, then use existing default of min_epochs = 1 self.min_epochs = 1 if (min_epochs is None and min_steps is None) else min_epochs self.max_steps = max_steps self.min_steps = min_steps if num_sanity_val_steps == -1: self.trainer.num_sanity_val_steps = float("inf") else: self.trainer.num_sanity_val_steps = num_sanity_val_steps
def __init__(self, name: Optional[str] = None, save_dir: Optional[str] = None, offline: Optional[bool] = False, id: Optional[str] = None, anonymous: Optional[bool] = False, version: Optional[str] = None, project: Optional[str] = None, log_model: Optional[bool] = False, experiment=None, prefix: Optional[str] = '', sync_step: Optional[bool] = True, **kwargs): if wandb is None: raise ImportError( 'You want to use `wandb` logger which is not installed yet,' # pragma: no-cover ' install it with `pip install wandb`.') if offline and log_model: raise MisconfigurationException( f'Providing log_model={log_model} and offline={offline} is an invalid configuration' ' since model checkpoints cannot be uploaded in offline mode.\n' 'Hint: Set `offline=False` to log your model.') super().__init__() self._name = name self._save_dir = save_dir self._offline = offline self._id = version or id self._anonymous = 'allow' if anonymous else None self._project = project self._log_model = log_model self._prefix = prefix self._sync_step = sync_step self._experiment = experiment self._kwargs = kwargs # logging multiple Trainer on a single W&B run (k-fold, resuming, etc) self._step_offset = 0 self.warning_cache = WarningCache()
def __init__(self, min_steps: int, max_steps: int): super().__init__() self.min_steps: int = min_steps self.max_steps: int = max_steps self.global_step: int = 0 # the total batch index across all epochs self.total_batch_idx: int = 0 # the current split index when the batch gets split into chunks in truncated backprop through time self.split_idx: Optional[int] = None # the number of batches seen this run, updates immediately after batch_loop.run() # TODO: replace by progress tracking self.batches_seen: int = 0 self.is_last_batch: Optional[bool] = None self.batch_progress = Progress() self.scheduler_progress = SchedulerProgress() self.batch_loop: Optional[TrainingBatchLoop] = None self.val_loop: Optional["loops.EvaluationLoop"] = None self._results = ResultCollection(training=True) self._dataloader_idx: Optional[int] = None self._warning_cache: WarningCache = WarningCache() self._epoch_output: Optional[List[List[STEP_OUTPUT]]] = None
def __init__(self, min_steps: Optional[int] = None, max_steps: int = -1) -> None: super().__init__() if max_steps < -1: raise MisconfigurationException( f"`max_steps` must be a non-negative integer or -1 (infinite steps). You passed in {max_steps}." ) self.min_steps = min_steps self.max_steps = max_steps self.batch_progress = BatchProgress() self.scheduler_progress = SchedulerProgress() self.batch_loop = TrainingBatchLoop() self.val_loop = loops.EvaluationLoop(verbose=False) self._results = _ResultCollection(training=True) self._outputs: _OUTPUTS_TYPE = [] self._warning_cache = WarningCache() # caches the loaded dataloader state until dataloader objects are available self._dataloader_state_dict: Dict[str, Any] = {} self._batches_that_stepped: int = 0
from pytorch_lightning.utilities.distributed import ( _get_process_group_backend_from_env, get_default_process_group_backend_for_device, log, ) from pytorch_lightning.utilities.enums import AMPType, PrecisionType from pytorch_lightning.utilities.exceptions import MisconfigurationException from pytorch_lightning.utilities.imports import _RequirementAvailable from pytorch_lightning.utilities.model_helpers import is_overridden from pytorch_lightning.utilities.optimizer import optimizers_to_device from pytorch_lightning.utilities.rank_zero import rank_zero_info from pytorch_lightning.utilities.seed import reset_seed from pytorch_lightning.utilities.types import _PATH, LRSchedulerConfig, LRSchedulerTypeUnion, STEP_OUTPUT from pytorch_lightning.utilities.warnings import rank_zero_warn, WarningCache warning_cache = WarningCache() _DEEPSPEED_AVAILABLE: bool = _RequirementAvailable("deepspeed") if _DEEPSPEED_AVAILABLE: import deepspeed def remove_module_hooks(model: torch.nn.Module) -> None: # todo (tchaton) awaiting this feature to move upstream to DeepSpeed for module in model.modules(): module._backward_hooks = OrderedDict() module._is_full_backward_hook = None module._forward_hooks = OrderedDict() module._forward_pre_hooks = OrderedDict() module._state_dict_hooks = OrderedDict() module._load_state_dict_pre_hooks = OrderedDict()
def __init__(self, trainer): self.trainer = trainer self.max_batches = None self.num_dataloaders = None self.warning_cache = WarningCache()