def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor: """ Does a forward pass on the given batches and returns the ``loss`` value in the result. If ``for_training`` is `True` also applies regularization penalty. """ if self._multiple_gpu: output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices) else: assert len(batch_group) == 1 batch = batch_group[0] batch = nn_util.move_to_device(batch, self._cuda_devices[0]) output_dict = self.model(**batch) try: loss = output_dict["loss"] if for_training: loss += self.model.get_regularization_penalty() except KeyError: if for_training: raise RuntimeError( "The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") loss = None return loss
def _run_model(self, batch_group): if self.n_gpu_use > 1: output_dict = training_util.data_parallel(batch_group, self.model, self.device) else: assert len(batch_group) == 1 batch = batch_group[0] batch = nn_util.move_to_device(batch, self.device[0]) output_dict = self.model(**batch) return output_dict
def get_output_dict(self, batch_group: List[TensorDict], for_training: bool) -> Dict[str, torch.Tensor]: """ Does a forward pass on the given batches and returns the ``loss`` value in the result. If ``for_training`` is `True` also applies regularization penalty. """ if self._multiple_gpu: output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices) else: assert len(batch_group) == 1 batch = batch_group[0] batch = nn_util.move_to_device(batch, self._cuda_devices[0]) output_dict = self.model(**batch) return output_dict
def batch_loss(self, batch_group: List[TensorDict], for_training: bool, eval_metric=True): """ Does a forward pass on the given batches and returns the ``loss`` value in the result. If ``for_training`` is `True` also applies regularization penalty. """ if self.trainer._multiple_gpu: output_dict = training_util.data_parallel( batch_group, self.trainer.model, self.trainer._cuda_devices) else: assert len(batch_group) == 1 batch = batch_group[0] batch = nn_util.move_to_device(batch, self.trainer._cuda_devices[0]) output_dict = self.trainer.model(**batch, eval_metric=eval_metric) if for_training and eval_metric: output_dict[ 'regularization_penalty'] = self.trainer.model.get_regularization_penalty( ) return output_dict
def batch_loss(self, batch_group: List[TensorDict], for_training: bool) -> torch.Tensor: """ Does a forward pass on the given batches and returns the ``loss`` value in the result. If ``for_training`` is `True` also applies regularization penalty. """ if self._multiple_gpu: output_dict = training_util.data_parallel(batch_group, self.model, self._cuda_devices) else: # if self._num_gradient_accumulation_steps == 1: assert len(batch_group) == 1 batch = batch_group[0] batch = nn_util.move_to_device(batch, self._cuda_devices[0]) output_dict = self.model(**batch) # try: # output_dict = self.model(**batch) # wrappedmodel = ModelWrapper(self.model) # processed_inputs = wrappedmodel.process_inputs(batch) # output_dict = checkpoint(wrappedmodel, processed_inputs) # except RuntimeError: # print("Probably CUDA out of memory") # return None try: loss = output_dict["loss"] if for_training: loss += self.model.get_regularization_penalty() except KeyError: if for_training: raise RuntimeError( "The model you are trying to optimize does not contain a" " 'loss' key in the output of model.forward(inputs).") loss = None return loss