def advance(self, batch, batch_idx): """Runs the train step together with optimization (if necessary) on the current batch split. Args: batch: the current batch to run the training on (this is not the split!) batch_idx: the index of the current batch """ void(batch) split_idx, split_batch = self._remaining_splits.pop(0) self.split_idx = split_idx # let logger connector extract current batch size self.trainer.logger_connector.on_train_split_start( batch_idx, split_idx, split_batch) if self.trainer.lightning_module.automatic_optimization: # in automatic optimization, hand over execution to the OptimizerLoop optimizers = [ optimizer for _, optimizer in self.get_active_optimizers(batch_idx) ] batch_outputs, self._hiddens = self.optimizer_loop.run( split_batch, self._hiddens, optimizers, batch_idx) # combine outputs from each optimizer for k in range(len(batch_outputs)): self.batch_outputs[k].extend(batch_outputs[k]) else: # in manual optimization, there is no looping over optimizers result = self._run_optimization(batch_idx, split_batch) if result: self.batch_outputs[0].append(deepcopy( result.result_collection))
def advance( # type: ignore[override] self, data_fetcher: AbstractDataFetcher, dataloader_idx: Optional[int], dl_max_batches: int) -> None: """Calls the evaluation step with the corresponding hooks and updates the logger connector. Args: data_fetcher: iterator over the dataloader dataloader_idx: index of the current dataloader dl_max_batches: maximum number of batches the dataloader can produce Raises: StopIteration: If the current batch is None """ void(dl_max_batches) assert self._dataloader_iter is not None batch, self.batch_progress.is_last_batch = next(self._dataloader_iter) if batch is None: raise StopIteration # configure step_kwargs # TODO: each loop should construct its own kwargs, so we avoid the dataloader_idx reference here kwargs = self._build_kwargs(batch, self.batch_progress.current.ready, dataloader_idx) self.batch_progress.increment_ready() # hook self._on_evaluation_batch_start(**kwargs) self.batch_progress.increment_started() # lightning module methods output = self._evaluation_step(**kwargs) output = self._evaluation_step_end(output) self.batch_progress.increment_processed() # track loss history self._on_evaluation_batch_end(output, **kwargs) self.batch_progress.increment_completed() # log batch metrics self.trainer.logger_connector.update_eval_step_metrics() # track epoch level outputs if self._should_track_batch_outputs_for_epoch_end( ) and output is not None: self._outputs.append(output) if self.trainer.move_metrics_to_cpu: # the evaluation step output is not moved as they are not considered "metrics" assert self.trainer._results is not None self.trainer._results.cpu() if not self.batch_progress.is_last_batch: # if fault tolerant is enabled and process has been notified, exit. self.trainer._exit_gracefully_on_signal()
def advance(self, batch, batch_idx, dataloader_idx): """Runs the train step together with optimization (if necessary) on the current batch split Args: batch: the current batch to run the training on (this is not the split!) batch_idx: the index of the current batch dataloader_idx: the index of the dataloader producing the current batch """ void(batch, dataloader_idx) split_idx, split_batch = self._remaining_splits.pop(0) self.batch_idx = batch_idx self.split_idx = split_idx # let logger connector extract current batch size self.trainer.logger_connector.on_train_split_start( batch_idx, split_idx, split_batch) if self.trainer.lightning_module.automatic_optimization: for opt_idx, optimizer in self.get_active_optimizers(batch_idx): result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer) if result: self.batch_outputs[opt_idx].append( result.training_step_output) else: # in manual optimization, there is no looping over optimizers result = self._run_optimization(batch_idx, split_batch) if result: self.batch_outputs[0].append(result.training_step_output)
def on_run_start( # type: ignore[override] self, data_fetcher: AbstractDataFetcher, dl_max_batches: int, kwargs: OrderedDict) -> None: """Adds the passed arguments to the loop's state if necessary. Args: data_fetcher: the current data_fetcher wrapping the dataloader dl_max_batches: maximum number of batches the dataloader can produce kwargs: the kwargs passed down to the hooks. """ void(kwargs) self._dl_max_batches = dl_max_batches self._reload_dataloader_state_dict(data_fetcher) # creates the iterator inside the fetcher but returns `self` self._data_fetcher = iter(data_fetcher) # add the previous `fetched` value to properly track `is_last_batch` with no prefetching data_fetcher.fetched += self.batch_progress.current.ready stage = self.trainer.state.stage assert stage is not None stage = stage.dataloader_prefix self._profiler_fetch_action = ( f"[{self.__class__.__name__}].{stage}_dataloader_idx_{kwargs.get('dataloader_idx', 0)}_next" ) data_fetcher._start_profiler = self._on_before_fetch data_fetcher._stop_profiler = self._on_after_fetch
def on_run_start(self, *args: Any, **kwargs: Any) -> None: """ Hook to be called as the first thing after entering :attr:`run` (except the state reset). Accepts all arguments passed to :attr:`run`. """ void(*args, **kwargs)
def advance(self, batch: Any, batch_idx: int) -> None: # type: ignore[override] """Runs the train step together with optimization (if necessary) on the current batch split. Args: batch: the current batch to run the training on (this is not the split!) batch_idx: the index of the current batch """ void(batch) self.split_idx, split_batch = self._remaining_splits.pop(0) self.trainer._logger_connector.on_train_split_start(self.split_idx) outputs: Optional[Union[_OPTIMIZER_LOOP_OUTPUTS_TYPE, _MANUAL_LOOP_OUTPUTS_TYPE]] = None # for mypy # choose which loop will run the optimization if self.trainer.lightning_module.automatic_optimization: optimizers = _get_active_optimizers( self.trainer.optimizers, self.trainer.optimizer_frequencies, batch_idx) outputs = self.optimizer_loop.run(split_batch, optimizers, batch_idx) else: outputs = self.manual_loop.run(split_batch, batch_idx) if outputs: # automatic: can be empty if all optimizers skip their batches # manual: #9052 added support for raising `StopIteration` in the `training_step`. If that happens, # then `advance` doesn't finish and an empty dict is returned self._outputs.append(outputs)
def advance(self, batch, batch_idx): """Runs the train step together with optimization (if necessary) on the current batch split Args: batch: the current batch to run the training on (this is not the split!) batch_idx: the index of the current batch """ void(batch) split_idx, split_batch = self._remaining_splits.pop(0) self.split_idx = split_idx # let logger connector extract current batch size self.trainer.logger_connector.on_train_split_start( batch_idx, split_idx, split_batch) if self.trainer.lightning_module.automatic_optimization: for opt_idx, optimizer in self.get_active_optimizers(batch_idx): # handle optimization restart if self.restarting: if opt_idx < self.optim_progress.optimizer_idx: continue self.optim_progress.optimizer_idx = opt_idx result = self._run_optimization(batch_idx, split_batch, opt_idx, optimizer) if result: self.batch_outputs[opt_idx].append( deepcopy(result.result_collection)) else: # in manual optimization, there is no looping over optimizers result = self._run_optimization(batch_idx, split_batch) if result: self.batch_outputs[0].append(deepcopy( result.result_collection))
def connect(self, trainer: 'pl.Trainer', *args: Any, **kwargs: Any) -> None: """Connects the loop with necessary arguments like the trainer""" # TODO(@justusschock): Do we want to forward *args and **kwargs to the inner loop here? # TODO(@justusschock): Can we make the trainer a weakref/proxy? void(*args, **kwargs) self.trainer = trainer self.training_loop.connect(trainer)
def advance( self, data_fetcher: AbstractDataFetcher, dataloader_idx: int, dl_max_batches: int, num_dataloaders: int ) -> None: """Calls the evaluation step with the corresponding hooks and updates the logger connector. Args: data_fetcher: iterator over the dataloader dataloader_idx: index of the current dataloader dl_max_batches: maximum number of batches the dataloader can produce num_dataloaders: the total number of dataloaders Raises: StopIteration: If the current batch is None """ void(data_fetcher, dl_max_batches, num_dataloaders) batch_idx, (batch, self.batch_progress.is_last_batch) = next(self._dataloader_iter) if batch is None: raise StopIteration if not self.trainer._data_connector.evaluation_data_fetcher.store_on_device: with self.trainer.profiler.profile("evaluation_batch_to_device"): batch = self.trainer.accelerator.batch_to_device(batch, dataloader_idx=dataloader_idx) self.batch_progress.increment_ready() # hook self._on_evaluation_batch_start(batch, batch_idx, dataloader_idx) self.batch_progress.increment_started() # lightning module methods with self.trainer.profiler.profile("evaluation_step_and_end"): output = self._evaluation_step(batch, batch_idx, dataloader_idx) output = self._evaluation_step_end(output) self.batch_progress.increment_processed() # track loss history self._on_evaluation_batch_end(output, batch, batch_idx, dataloader_idx) self.batch_progress.increment_completed() # log batch metrics self.trainer.logger_connector.update_eval_step_metrics() # track epoch level outputs if self._should_track_batch_outputs_for_epoch_end(): output = recursive_detach(output, to_cpu=self.trainer.move_metrics_to_cpu) if output is not None: self.outputs.append(output) if not self.batch_progress.is_last_batch: # if fault tolerant is enabled and process has been notified, exit. self.trainer._exit_gracefully_on_signal()
def on_run_start(self, batch: Any, batch_idx: int, dataloader_idx: int): """Splits the data into tbptt splits Args: batch: the current batch to run the trainstep on batch_idx: the index of the current batch dataloader_idx: the index of the dataloader producing the current batch """ void(batch_idx, dataloader_idx) self._remaining_splits = list(enumerate(self.tbptt_split_batch(batch)))
def on_run_start(self, batch: Any, batch_idx: int) -> None: # type: ignore[override] """Splits the data into tbptt splits. Args: batch: the current batch to run the trainstep on batch_idx: the index of the current batch """ void(batch_idx) self._remaining_splits = list(enumerate( self._tbptt_split_batch(batch)))
def advance( self, dataloader_iter: Iterator, dataloader_idx: int, dl_max_batches: int, num_dataloaders: int, ) -> None: """Calls the evaluation step with the corresponding hooks and updates the logger connector. Args: dataloader_iter: iterator over the dataloader dataloader_idx: index of the current dataloader dl_max_batches: maximum number of batches the dataloader can produce num_dataloaders: the total number of dataloaders Raises: StopIteration: If the current batch is None """ void(dl_max_batches, num_dataloaders) batch_idx, batch = next(dataloader_iter) if batch is None: raise StopIteration with self.trainer.profiler.profile("evaluation_batch_to_device"): batch = self.trainer.accelerator.batch_to_device( batch, dataloader_idx=dataloader_idx) self.batch_progress.increment_ready() # hook self.on_evaluation_batch_start(batch, batch_idx, dataloader_idx) self.batch_progress.increment_started() # lightning module methods with self.trainer.profiler.profile("evaluation_step_and_end"): output = self.evaluation_step(batch, batch_idx, dataloader_idx) output = self.evaluation_step_end(output) self.batch_progress.increment_processed() # hook + store predictions self.on_evaluation_batch_end(output, batch, batch_idx, dataloader_idx) self.batch_progress.increment_completed() # log batch metrics self.trainer.logger_connector.update_eval_step_metrics() # track epoch level outputs self.outputs = self._track_output_for_epoch_end(self.outputs, output)
def on_run_start(self, dataloader_iter: Iterator, dataloader_idx: int, dl_max_batches: int, num_dataloaders: int) -> None: """Adds the passed arguments to the loop's state if necessary Args: dataloader_iter: iterator over the dataloader dataloader_idx: index of the current dataloader dl_max_batches: maximum number of batches the dataloader can produce num_dataloaders: the total number of dataloaders """ void(dataloader_iter, dataloader_idx) self._dl_max_batches = dl_max_batches self._num_dataloaders = num_dataloaders
def to_onehot(label_tensor: torch.Tensor, num_classes: Optional[int] = None) -> torch.Tensor: """ .. deprecated:: Use :func:`torchmetrics.utilities.data.to_onehot`. Will be removed in v1.5.0. """ return void(label_tensor, num_classes)
def on_run_start( # type: ignore[override] self, data_fetcher: AbstractDataFetcher, dl_max_batches: int, kwargs: OrderedDict ) -> None: """Adds the passed arguments to the loop's state if necessary. Args: data_fetcher: the current data_fetcher wrapping the dataloader dl_max_batches: maximum number of batches the dataloader can produce kwargs: the kwargs passed down to the hooks. """ void(kwargs) self._dl_max_batches = dl_max_batches self._data_fetcher = data_fetcher self._reload_dataloader_state_dict(data_fetcher) self._dataloader_iter = iter(data_fetcher)
def on_run_start( # type: ignore[override] self, data_fetcher: AbstractDataFetcher, dataloader_idx: Optional[int], dl_max_batches: int) -> None: """Adds the passed arguments to the loop's state if necessary. Args: data_fetcher: the current data_fetcher wrapping the dataloader dataloader_idx: index of the current dataloader dl_max_batches: maximum number of batches the dataloader can produce """ void(dataloader_idx) self._dl_max_batches = dl_max_batches self._data_fetcher = data_fetcher self._reload_dataloader_state_dict(data_fetcher) self._dataloader_iter = iter(data_fetcher)
def on_run_start( # type: ignore[override] self, data_fetcher: AbstractDataFetcher, dl_max_batches: int, kwargs: OrderedDict ) -> None: """Adds the passed arguments to the loop's state if necessary. Args: data_fetcher: the current data_fetcher wrapping the dataloader dl_max_batches: maximum number of batches the dataloader can produce kwargs: the kwargs passed down to the hooks. """ void(kwargs) self._dl_max_batches = dl_max_batches self._reload_dataloader_state_dict(data_fetcher) # creates the iterator inside the fetcher but returns `self` self._data_fetcher = iter(data_fetcher) # add the previous `fetched` value to properly track `is_last_batch` with no prefetching data_fetcher.fetched += self.batch_progress.current.ready
def get_num_classes(pred: torch.Tensor, target: torch.Tensor, num_classes: Optional[int] = None) -> int: """ .. deprecated:: Use :func:`torchmetrics.utilities.data.get_num_classes`. Will be removed in v1.5.0. """ return void(pred, target, num_classes)
def on_run_start(self, data_fetcher: AbstractDataFetcher, dataloader_idx: int, dl_max_batches: int, num_dataloaders: int) -> None: """Adds the passed arguments to the loop's state if necessary. Args: data_fetcher: the current data_fetcher wrapping the dataloader dataloader_idx: index of the current dataloader dl_max_batches: maximum number of batches the dataloader can produce num_dataloaders: the total number of dataloaders """ void(dataloader_idx) self._dl_max_batches = dl_max_batches self._num_dataloaders = num_dataloaders self.dataloader_iter = _prepare_dataloader_iter( data_fetcher, self.batch_progress.current.ready)
def select_topk(prob_tensor: torch.Tensor, topk: int = 1, dim: int = 1) -> torch.Tensor: """ .. deprecated:: Use :func:`torchmetrics.utilities.data.select_topk`. Will be removed in v1.5.0. """ return void(prob_tensor, topk, dim)
def class_reduce(num: torch.Tensor, denom: torch.Tensor, weights: torch.Tensor, class_reduction: str = "none") -> torch.Tensor: """ .. deprecated:: Use :func:`torchmetrics.utilities.class_reduce`. Will be removed in v1.5.0. """ return void(num, denom, weights, class_reduction)
def on_run_start(self, dataloader_iter: Iterator, dataloader_idx: int, dl_max_batches: int, num_dataloaders: int, return_predictions: bool = False) -> None: """ Prepares the loops internal state Args: dataloader_iter: the iterator over the current dataloader dataloader_idx: the index of the current dataloader dl_max_batches: the maximum number of batches the current loader can produce num_dataloaders: the total number of dataloaders return_predictions: whether to return the obtained predictions """ void(dataloader_iter, dataloader_idx) self._dl_max_batches = dl_max_batches self._num_dataloaders = num_dataloaders self.return_predictions = return_predictions
def on_run_start( # type: ignore[override] self, dataloader_iter: Iterator, dataloader_idx: int, dl_max_batches: int, num_dataloaders: int, ) -> None: """Prepares the loops internal state. Args: dataloader_iter: the iterator over the current dataloader dataloader_idx: the index of the current dataloader dl_max_batches: the maximum number of batches the current loader can produce num_dataloaders: the total number of dataloaders """ void(dataloader_iter, dataloader_idx) self._dl_max_batches = dl_max_batches self._num_dataloaders = num_dataloaders # this call requires that `self.return_predictions` is set self._seen_batch_indices = self._get_batch_indices( dataloader_idx) if self.should_store_predictions else []
def dim_zero_sum(x): return void(x)
def on_advance_start(self, *args: Any, **kwargs: Any) -> None: """Hook to be called each time before :attr:`advance` is called. Accepts all arguments passed to :attr`run`. """ void(*args, **kwargs)
def dim_zero_mean(x): return void(x)
def to_categorical(tensor: torch.Tensor, argmax_dim: int = 1) -> torch.Tensor: """ .. deprecated:: Use :func:`torchmetrics.utilities.data.to_categorical`. Will be removed in v1.5.0. """ return void(tensor, argmax_dim)
def advance( # type: ignore[override] self, data_fetcher: AbstractDataFetcher, dl_max_batches: int, kwargs: OrderedDict, ) -> None: """Calls the evaluation step with the corresponding hooks and updates the logger connector. Args: data_fetcher: iterator over the dataloader dl_max_batches: maximum number of batches the dataloader can produce kwargs: the kwargs passed down to the hooks. Raises: StopIteration: If the current batch is None """ void(dl_max_batches) if not isinstance(data_fetcher, DataLoaderIterDataFetcher): batch_idx = self.batch_progress.current.ready batch = next(data_fetcher) else: batch_idx, batch = next(data_fetcher) self.batch_progress.is_last_batch = data_fetcher.done # configure step_kwargs kwargs = self._build_kwargs(kwargs, batch, batch_idx) self.batch_progress.increment_ready() # hook self._on_evaluation_batch_start(**kwargs) self.batch_progress.increment_started() # lightning module methods output = self._evaluation_step(**kwargs) output = self._evaluation_step_end(output) self.batch_progress.increment_processed() # track loss history self._on_evaluation_batch_end(output, **kwargs) self.batch_progress.increment_completed() # log batch metrics self.trainer._logger_connector.update_eval_step_metrics() # track epoch level outputs if self._should_track_batch_outputs_for_epoch_end() and output is not None: self._outputs.append(output) if self.trainer.move_metrics_to_cpu: # the evaluation step output is not moved as they are not considered "metrics" assert self.trainer._results is not None self.trainer._results.cpu() if not self.batch_progress.is_last_batch: # if fault tolerant is enabled and process has been notified, exit. self.trainer._exit_gracefully_on_signal()
def reduce(to_reduce: torch.Tensor, reduction: str) -> torch.Tensor: """ .. deprecated:: Use :func:`torchmetrics.utilities.reduce`. Will be removed in v1.5.0. """ return void(to_reduce, reduction)
def connect(self, trainer: 'pl.Trainer', *args: Any, **kwargs: Any) -> None: # TODO(@justusschock): can we make this a weakref/proxy? void(*args, **kwargs) self.trainer = trainer