Esempio n. 1
0
 def configure_ddp(self) -> None:
     self._wrap_optimizers()
     self._model = ShardedDataParallel(
         LightningShardedDataParallel(self.model),
         sharded_optimizer=self.lightning_module.trainer.optimizers,
         **self._ddp_kwargs
     )
     setattr(self._model, "require_backward_grad_sync", False)
Esempio n. 2
0
 def configure_ddp(self):
     self._wrap_optimizers()
     self._model = ShardedDataParallel(
         LightningShardedDataParallel(self.model),
         sharded_optimizer=self.lightning_module.trainer.optimizers,
         # For multi-node training, enabling bucketing will improve performance.
         reduce_buffer_size=self._REDUCE_BUFFER_SIZE_DEFAULT if self.num_nodes > 1 else 0,
     )
     setattr(self._model, "require_backward_grad_sync", False)
Esempio n. 3
0
    def _setup_model_and_optimizers(self, model: Module, optimizers: List[Optimizer]) -> Tuple[Module, List[Optimizer]]:
        """Wraps the model and optimizers with fairscale components.

        Return:
            The model wrapped into a :class:`~fairscale.nn.data_parallel.ShardedDataParallel` module
            and a list of optimizer wrapped in :class:~`fairscale.optim.OSS`.
        """
        optimizers = self._wrap_optimizers(optimizers)
        model = ShardedDataParallel(model, sharded_optimizer=optimizers, **self._ddp_kwargs)
        return model, optimizers
Esempio n. 4
0
 def configure_ddp(self):
     self._wrap_optimizers()
     self._model = ShardedDataParallel(
         LightningShardedDataParallel(self.model),
         sharded_optimizer=self.lightning_module.trainer.optimizers)