def model(self): if self._wrapped_model is None: if ( self.data_parallel_world_size > 1 and not self.args.use_bmuf and not self.tpu ): self._wrapped_model = models.DistributedFairseqModel( self.args, self._model, process_group=self.data_parallel_process_group ) else: self._wrapped_model = self._model return self._wrapped_model
def criterion(self): if self._wrapped_criterion is None: if ( utils.has_parameters(self._criterion) and self.data_parallel_world_size > 1 and not self.args.use_bmuf ): self._wrapped_criterion = models.DistributedFairseqModel( self.args, self._criterion, process_group=self.data_parallel_process_group ) else: self._wrapped_criterion = self._criterion return self._wrapped_criterion
def model(self): if self._wrapped_model is None: if self.args.distributed_world_size > 1: self._wrapped_model = models.DistributedFairseqModel( self.args, self._model, ) else: self._wrapped_model = self._model if self.args.kd: for value in self._wrapped_model.distill_models.values(): value.to( next(self._wrapped_model.encoder.parameters()).device) return self._wrapped_model
def model(self): if self._wrapped_model is None: if ( self.data_parallel_world_size > 1 and not self.cfg.optimization.use_bmuf and not self.tpu ): self._wrapped_model = models.DistributedFairseqModel( self.cfg.distributed_training, self._model, process_group=self.data_parallel_process_group, ) else: self._wrapped_model = self._model return self._wrapped_model
def criterion(self): if self._wrapped_criterion is None: if ( utils.has_parameters(self._criterion) and self.use_distributed_wrapper ): self._wrapped_criterion = models.DistributedFairseqModel( self.cfg.distributed_training, self._criterion, process_group=self.data_parallel_process_group, device=self.device, ) else: self._wrapped_criterion = self._criterion return self._wrapped_criterion
def criterion(self): if self._wrapped_criterion is None: if ( utils.has_parameters(self._criterion) and self.data_parallel_world_size > 1 and not self.cfg.optimization.use_bmuf and not self.tpu ): self._wrapped_criterion = models.DistributedFairseqModel( self.cfg.distributed_training, self._criterion, process_group=self.data_parallel_process_group, ) else: self._wrapped_criterion = self._criterion return self._wrapped_criterion