コード例 #1
0
    def __init__(self, min_steps: Optional[int] = 0, max_steps: int = -1) -> None:
        super().__init__()
        if max_steps is None:
            rank_zero_deprecation(
                "Setting `max_steps = None` is deprecated in v1.5 and will no longer be supported in v1.7."
                " Use `max_steps = -1` instead."
            )
            max_steps = -1
        elif max_steps < -1:
            raise MisconfigurationException(
                f"`max_steps` must be a non-negative integer or -1 (infinite steps). You passed in {max_steps}."
            )
        self.min_steps = min_steps
        self.max_steps = max_steps

        self.global_step: int = 0
        self.batch_progress = BatchProgress()
        self.scheduler_progress = SchedulerProgress()

        self.batch_loop: Optional[TrainingBatchLoop] = None
        self.val_loop: Optional["loops.EvaluationLoop"] = None

        self._results = ResultCollection(training=True)
        self._outputs: _OUTPUTS_TYPE = []
        self._warning_cache = WarningCache()
        self._dataloader_iter: Optional[Iterator] = None
        # caches the loaded dataloader state until dataloader objects are available
        self._dataloader_state_dict: Dict[str, Any] = {}
コード例 #2
0
 def __init__(self, trainer: 'pl.Trainer'):
     self.trainer: 'pl.Trainer' = trainer
     self.outputs: EPOCH_OUTPUT = []
     self.predictions: Optional[PredictionCollection] = None
     self.max_batches: Optional[List[Union[int, float]]] = None
     self.warning_cache = WarningCache()
     self.num_dataloaders: Optional[int] = None
コード例 #3
0
    def __init__(self, min_steps: int, max_steps: int):
        super().__init__()
        self.min_steps: int = min_steps
        self.max_steps: int = max_steps

        self.global_step: int = 0

        # the total batch index across all epochs
        self.total_batch_idx: int = 0
        # the current batch index in the loop that runs over the dataloader(s)
        self.iteration_count: int = 0
        # the current split index when the batch gets split into chunks in truncated backprop through time
        self.split_idx: Optional[int] = None

        self._dataloader_idx: Optional[int] = None
        self._should_stop: bool = False

        self.is_last_batch: Optional[bool] = None
        self.batches_seen: int = 0
        self.warning_cache: WarningCache = WarningCache()
        self.epoch_output: Optional[List[List[STEP_OUTPUT]]] = None

        self.batch_loop: Optional[TrainingBatchLoop] = None

        self._results = ResultCollection(training=True)
コード例 #4
0
class Closure(AbstractClosure[ClosureResult]):
    """An implementation of a :class:`AbstractClosure` for automatic optimization in Lightning that combines three
    elementary closures into one: ``training_step``, ``backward`` and ``zero_grad``.

    The Closure gets created by the training loop(s) and is then passed to the
    :meth:`torch.optim.Optimizer.step` method. An optimizer is responsible for calling the closure and optionally
    do something with the output.

    Args:
        step_fn: This is typically the :meth:`pytorch_lightning.core.lightning.LightningModule.training_step
            wrapped with processing for its outputs
        backward_fn: A function that takes a loss value as input, performs back-propagation and returns the loss value.
            Can be set to ``None`` to skip the backward operation.
        zero_grad_fn: A function that zeroes the gradients. Can be set to ``None`` to skip zero_grad, for example
            when accumulating gradients.
        profiler: A profiler for profiling the actions of the passed in closure functions.

    Example:

        closure = Closure()
        optimizer = torch.optim.Adam(...)
        optimizer.step(closure)
    """

    warning_cache = WarningCache()

    def __init__(
        self,
        step_fn: Callable[[], ClosureResult],
        backward_fn: Optional[Callable[[Tensor], None]] = None,
        zero_grad_fn: Optional[Callable[[], None]] = None,
        profiler: Optional[BaseProfiler] = None,
    ):
        super().__init__()
        self._step_fn = step_fn
        self._backward_fn = backward_fn
        self._zero_grad_fn = zero_grad_fn
        self._profiler = PassThroughProfiler(
        ) if profiler is None else profiler

    def closure(self, *args: Any, **kwargs: Any) -> ClosureResult:
        with self._profiler.profile("training_step_and_backward"):
            step_output = self._step_fn()

            if step_output.closure_loss is None:
                self.warning_cache.warn(
                    "`training_step` returned `None`. If this was on purpose, ignore this warning..."
                )

            if self._zero_grad_fn is not None:
                self._zero_grad_fn()

            if self._backward_fn is not None and step_output.closure_loss is not None:
                self._backward_fn(step_output.closure_loss)

        return step_output

    def __call__(self, *args: Any, **kwargs: Any) -> Optional[Tensor]:
        self._result = self.closure(*args, **kwargs)
        return self._result.loss
コード例 #5
0
 def __init__(self, trainer):
     self.trainer = trainer
     self.outputs = []
     self.step_metrics = []
     self.predictions = None
     self.max_batches = None
     self.warning_cache = WarningCache()
     self.num_dataloaders = None
コード例 #6
0
 def __init__(self) -> None:
     super().__init__()
     self.return_predictions: bool = False
     self.predictions: List[Any] = []
     self.current_batch_indices: List[int] = []
     self._dl_max_batches: Optional[int] = None
     self._num_dataloaders: Optional[int] = None
     self._warning_cache = WarningCache()
     self._all_batch_indices: List[int] = []
コード例 #7
0
 def __init__(self, trainer):
     self.trainer = trainer
     self.max_batches = None
     self.num_dataloaders = None
     self.warning_cache = WarningCache()
     self.batch_indices: Optional[List[int]] = None
     self.epoch_batch_indices: Optional[List[List[int]]] = None
     # `DDPSpawnPlugin` plugins and derivate don't support return predictions.
     self._return_predictions: Optional[bool] = None
     self._previous_grad_status: Optional[bool] = None
コード例 #8
0
 def __init__(self, trainer, multiple_trainloader_mode: str):
     self.trainer = trainer
     self.accumulated_loss = None
     self.warning_cache = WarningCache()
     self._teardown_already_run = False
     self.running_loss = TensorRunningAccum(window_length=20)
     self._curr_step_result = None
     self._cur_grad_norm_dict = None
     self._multiple_trainloader_mode = multiple_trainloader_mode
     self._skip_backward = False
     self.trainer._multiple_trainloader_mode = multiple_trainloader_mode
コード例 #9
0
    def __init__(self) -> None:
        super().__init__()
        self.return_predictions = False
        self.predictions: List[Any] = []
        self.current_batch_indices: List[int] = []
        self.batch_progress = Progress()

        self._dl_max_batches = 0
        self._num_dataloaders = 0
        self._warning_cache = WarningCache()
        self._seen_batch_indices: List[List[int]] = []
コード例 #10
0
    def __init__(self) -> None:
        super().__init__()
        self.accumulated_loss: Optional[Tensor] = None
        self.batch_outputs: Optional[List[List[STEP_OUTPUT]]] = None
        self.running_loss: TensorRunningAccum = TensorRunningAccum(window_length=20)
        self.batch_idx: int = 0
        self.split_idx: Optional[int] = None
        self.warning_cache: WarningCache = WarningCache()

        self._hiddens: Optional[Tensor] = None
        self._optimizer_freq_cumsum: Optional[int] = None
        self._remaining_splits: Optional[List[Any]] = None
        self._skip_backward: bool = False
コード例 #11
0
 def __init__(self, trainer, multiple_trainloader_mode):
     self.trainer = trainer
     self.early_stopping_accumulator = None
     self.checkpoint_accumulator = None
     self.accumulated_loss = None
     self.warning_cache = WarningCache()
     self._teardown_already_run = False
     self.running_loss = TensorRunningAccum(window_length=20)
     self.automatic_optimization = True
     self._curr_step_result = None
     self._cur_grad_norm_dict = None
     self._multiple_trainloader_mode = multiple_trainloader_mode
     self._skip_backward = False
     self.trainer._multiple_trainloader_mode = multiple_trainloader_mode
コード例 #12
0
    def __init__(self) -> None:
        super().__init__()
        self.accumulated_loss: Optional[Tensor] = None
        self.batch_outputs: Optional[List[List[STEP_OUTPUT]]] = None
        self.running_loss: TensorRunningAccum = TensorRunningAccum(
            window_length=20)
        # the current split index when the batch gets split into chunks in truncated backprop through time
        self.split_idx: Optional[int] = None
        self.optimizer_loop = OptimizerLoop()

        self._warning_cache: WarningCache = WarningCache()
        self._hiddens: Optional[Tensor] = None
        self._optimizer_freq_cumsum: Optional[int] = None
        self._remaining_splits: Optional[List[Any]] = None
コード例 #13
0
    def __init__(
        self,
        trainer,
        max_epochs: Optional[int],
        min_epochs: Optional[int],
        max_steps: Optional[int],
        min_steps: Optional[int],
        num_sanity_val_steps: int,
    ):
        self.trainer = trainer
        self.accumulated_loss = None
        self.warning_cache = WarningCache()
        self._teardown_already_run = False
        self.running_loss = TensorRunningAccum(window_length=20)
        self._skip_backward = False
        self._optimizer_freq_cumsum = None
        self._hiddens = None

        self.global_step = 0
        self.current_epoch = 0
        self.trainer.should_stop = False

        # the total batch index across all epochs
        self.total_batch_idx = 0
        # the current batch index in the loop that runs over the dataloader(s)
        self.batch_idx = 0
        # the current split index when the batch gets split into chunks in truncated backprop through time
        self.split_idx = None

        self.trainer.num_training_batches = 0
        self.trainer.train_dataloader = None

        # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000
        self.max_epochs = 1000 if (max_epochs is None
                                   and max_steps is None) else max_epochs
        # If neither min_epochs or min_steps is set, then use existing default of min_epochs = 1
        self.min_epochs = 1 if (min_epochs is None
                                and min_steps is None) else min_epochs
        self.max_steps = max_steps
        self.min_steps = min_steps

        if num_sanity_val_steps == -1:
            self.trainer.num_sanity_val_steps = float("inf")
        else:
            self.trainer.num_sanity_val_steps = num_sanity_val_steps
コード例 #14
0
    def __init__(self, min_steps: int, max_steps: int):
        super().__init__()
        self.min_steps: int = min_steps
        self.max_steps: int = max_steps
        self.global_step: int = 0
        # the total batch index across all epochs
        self.total_batch_idx: int = 0
        self.is_last_batch: Optional[bool] = None
        self.batch_progress = Progress()
        self.scheduler_progress = SchedulerProgress()

        self.batch_loop: Optional[TrainingBatchLoop] = None
        self.val_loop: Optional["loops.EvaluationLoop"] = None

        self._results = ResultCollection(training=True)
        self._dataloader_idx: Optional[int] = None
        self._warning_cache: WarningCache = WarningCache()
        self._epoch_output: Optional[List[List[STEP_OUTPUT]]] = None
コード例 #15
0
    def __init__(
        self,
        trainer,
        multiple_trainloader_mode: str,
        max_epochs: Optional[int],
        min_epochs: Optional[int],
        max_steps: Optional[int],
        min_steps: Optional[int],
        num_sanity_val_steps: int,
    ):
        self.trainer = trainer
        self.accumulated_loss = None
        self.warning_cache = WarningCache()
        self._teardown_already_run = False
        self.running_loss = TensorRunningAccum(window_length=20)
        self._curr_step_result = None
        self._cur_grad_norm_dict = None
        self._multiple_trainloader_mode = multiple_trainloader_mode
        self._skip_backward = False
        self.trainer._multiple_trainloader_mode = multiple_trainloader_mode
        self._optimizer_freq_cumsum = None

        self.global_step = 0
        self.current_epoch = 0
        self.trainer.should_stop = False

        self.total_batch_idx = 0
        self.batch_idx = 0
        self.trainer.num_training_batches = 0
        self.trainer.train_dataloader = None

        # If neither max_epochs or max_steps is set, then use existing default of max_epochs = 1000
        self.max_epochs = 1000 if (max_epochs is None
                                   and max_steps is None) else max_epochs
        # If neither min_epochs or min_steps is set, then use existing default of min_epochs = 1
        self.min_epochs = 1 if (min_epochs is None
                                and min_steps is None) else min_epochs
        self.max_steps = max_steps
        self.min_steps = min_steps

        if num_sanity_val_steps == -1:
            self.trainer.num_sanity_val_steps = float("inf")
        else:
            self.trainer.num_sanity_val_steps = num_sanity_val_steps
コード例 #16
0
    def __init__(self,
                 name: Optional[str] = None,
                 save_dir: Optional[str] = None,
                 offline: Optional[bool] = False,
                 id: Optional[str] = None,
                 anonymous: Optional[bool] = False,
                 version: Optional[str] = None,
                 project: Optional[str] = None,
                 log_model: Optional[bool] = False,
                 experiment=None,
                 prefix: Optional[str] = '',
                 sync_step: Optional[bool] = True,
                 **kwargs):
        if wandb is None:
            raise ImportError(
                'You want to use `wandb` logger which is not installed yet,'  # pragma: no-cover
                ' install it with `pip install wandb`.')

        if offline and log_model:
            raise MisconfigurationException(
                f'Providing log_model={log_model} and offline={offline} is an invalid configuration'
                ' since model checkpoints cannot be uploaded in offline mode.\n'
                'Hint: Set `offline=False` to log your model.')

        super().__init__()
        self._name = name
        self._save_dir = save_dir
        self._offline = offline
        self._id = version or id
        self._anonymous = 'allow' if anonymous else None
        self._project = project
        self._log_model = log_model
        self._prefix = prefix
        self._sync_step = sync_step
        self._experiment = experiment
        self._kwargs = kwargs
        # logging multiple Trainer on a single W&B run (k-fold, resuming, etc)
        self._step_offset = 0
        self.warning_cache = WarningCache()
コード例 #17
0
    def __init__(self, min_steps: int, max_steps: int):
        super().__init__()
        self.min_steps: int = min_steps
        self.max_steps: int = max_steps
        self.global_step: int = 0
        # the total batch index across all epochs
        self.total_batch_idx: int = 0
        # the current split index when the batch gets split into chunks in truncated backprop through time
        self.split_idx: Optional[int] = None
        # the number of batches seen this run, updates immediately after batch_loop.run()
        # TODO: replace by progress tracking
        self.batches_seen: int = 0
        self.is_last_batch: Optional[bool] = None
        self.batch_progress = Progress()
        self.scheduler_progress = SchedulerProgress()

        self.batch_loop: Optional[TrainingBatchLoop] = None
        self.val_loop: Optional["loops.EvaluationLoop"] = None

        self._results = ResultCollection(training=True)
        self._dataloader_idx: Optional[int] = None
        self._warning_cache: WarningCache = WarningCache()
        self._epoch_output: Optional[List[List[STEP_OUTPUT]]] = None
コード例 #18
0
    def __init__(self,
                 min_steps: Optional[int] = None,
                 max_steps: int = -1) -> None:
        super().__init__()
        if max_steps < -1:
            raise MisconfigurationException(
                f"`max_steps` must be a non-negative integer or -1 (infinite steps). You passed in {max_steps}."
            )
        self.min_steps = min_steps
        self.max_steps = max_steps

        self.batch_progress = BatchProgress()
        self.scheduler_progress = SchedulerProgress()

        self.batch_loop = TrainingBatchLoop()
        self.val_loop = loops.EvaluationLoop(verbose=False)

        self._results = _ResultCollection(training=True)
        self._outputs: _OUTPUTS_TYPE = []
        self._warning_cache = WarningCache()
        # caches the loaded dataloader state until dataloader objects are available
        self._dataloader_state_dict: Dict[str, Any] = {}
        self._batches_that_stepped: int = 0
コード例 #19
0
from pytorch_lightning.utilities.distributed import (
    _get_process_group_backend_from_env,
    get_default_process_group_backend_for_device,
    log,
)
from pytorch_lightning.utilities.enums import AMPType, PrecisionType
from pytorch_lightning.utilities.exceptions import MisconfigurationException
from pytorch_lightning.utilities.imports import _RequirementAvailable
from pytorch_lightning.utilities.model_helpers import is_overridden
from pytorch_lightning.utilities.optimizer import optimizers_to_device
from pytorch_lightning.utilities.rank_zero import rank_zero_info
from pytorch_lightning.utilities.seed import reset_seed
from pytorch_lightning.utilities.types import _PATH, LRSchedulerConfig, LRSchedulerTypeUnion, STEP_OUTPUT
from pytorch_lightning.utilities.warnings import rank_zero_warn, WarningCache

warning_cache = WarningCache()

_DEEPSPEED_AVAILABLE: bool = _RequirementAvailable("deepspeed")
if _DEEPSPEED_AVAILABLE:
    import deepspeed


def remove_module_hooks(model: torch.nn.Module) -> None:
    # todo (tchaton) awaiting this feature to move upstream to DeepSpeed
    for module in model.modules():
        module._backward_hooks = OrderedDict()
        module._is_full_backward_hook = None
        module._forward_hooks = OrderedDict()
        module._forward_pre_hooks = OrderedDict()
        module._state_dict_hooks = OrderedDict()
        module._load_state_dict_pre_hooks = OrderedDict()
コード例 #20
0
 def __init__(self, trainer):
     self.trainer = trainer
     self.max_batches = None
     self.num_dataloaders = None
     self.warning_cache = WarningCache()