def flatten_parameters(self): """Resets parameter data pointer so that they can use faster code paths. Right now, this works only if the module is on the GPU and cuDNN is enabled. Otherwise, it's a no-op. """ any_param = next(self.parameters()).data if not any_param.is_cuda or not torch.backends.cudnn.is_acceptable(any_param): return # If any parameters alias, we fall back to the slower, copying code path. This is # a sufficient check, because overlapping parameter buffers that don't completely # alias would break the assumptions of the uniqueness check in # Module.named_parameters(). all_weights = self._flat_weights unique_data_ptrs = set(p.data_ptr() for p in all_weights) if len(unique_data_ptrs) != len(all_weights): return with torch.cuda.device_of(any_param): import torch.backends.cudnn.rnn as rnn # NB: This is a temporary hack while we still don't have Tensor # bindings for ATen functions with torch.no_grad(): # NB: this is an INPLACE function on all_weights, that's why the # no_grad() is necessary. torch._cudnn_rnn_flatten_weight( all_weights, (4 if self.bias else 2), self.input_size, rnn.get_cudnn_mode(self.mode), self.hidden_size, self.num_layers, self.batch_first, bool(self.bidirectional))
def flatten_parameters(self): """Resets parameter data pointer so that they can use faster code paths. Right now, this works only if the module is on the GPU and cuDNN is enabled. Otherwise, it's a no-op. """ any_param = next(self.parameters()).data if not any_param.is_cuda or not torch.backends.cudnn.is_acceptable( any_param): return all_weights = self.all_weights unique_data_ptrs = set(p.data_ptr() for p in all_weights) if len(unique_data_ptrs) != len(all_weights): return with torch.cuda.device_of(any_param): import torch.backends.cudnn.rnn as rnn # NB: This is a temporary hack while we still don't have Tensor # bindings for ATen functions with torch.no_grad(): # NB: this is an INPLACE function on all_weights, that's why the # no_grad() is necessary. torch._cudnn_rnn_flatten_weight( all_weights, (4 if self.bias else 2), self.input_size, rnn.get_cudnn_mode(self.mode), self.hidden_size, self.num_layers, self.batch_first, bool(self.bidirectional))
def flatten_parameters(self): """Resets parameter data pointer so that they can use faster code paths. Right now, this works only if the module is on the GPU and cuDNN is enabled. Otherwise, it's a no-op. """ # Short-circuits if _flat_weights is only partially instantiated if len(self._flat_weights) != len(self._flat_weights_names): return for w in self._flat_weights: if not torch.is_tensor(w): return # Short-circuits if any tensor in self._flat_weights is not acceptable to cuDNN # or the tensors in _flat_weights are of different dtypes first_fw = self._flat_weights[0] dtype = first_fw.dtype for fw in self._flat_weights: if (not torch.is_tensor(fw.data) or not (fw.data.dtype == dtype) or not fw.data.is_cuda or not torch.backends.cudnn.is_acceptable(fw.data)): return # If any parameters alias, we fall back to the slower, copying code path. This is # a sufficient check, because overlapping parameter buffers that don't completely # alias would break the assumptions of the uniqueness check in # Module.named_parameters(). unique_data_ptrs = set(p.data_ptr() for p in self._flat_weights) if len(unique_data_ptrs) != len(self._flat_weights): return with torch.cuda.device_of(first_fw): import torch.backends.cudnn.rnn as rnn # Note: no_grad() is necessary since _cudnn_rnn_flatten_weight is # an inplace operation on self._flat_weights with torch.no_grad(): torch._cudnn_rnn_flatten_weight(self._flat_weights, (4 if self.bias else 2), self.input_size, rnn.get_cudnn_mode(self.mode), self.hidden_size, self.num_layers, self.batch_first, bool(self.bidirectional))
def flatten_parameters(self): any_param = next(self.parameters()).data if not any_param.is_cuda or not torch.backends.cudnn.is_acceptable( any_param): return all_weights = self._flat_weights unique_data_ptrs = set(p.data_ptr() for p in all_weights) if len(unique_data_ptrs) != len(all_weights): return with torch.cuda.device_of(any_param): import torch.backends.cudnn.rnn as rnn with torch.no_grad(): torch._cudnn_rnn_flatten_weight( all_weights, (4 if self.bias else 2), self.input_size, rnn.get_cudnn_mode('LSTM'), self.hidden_size, self.num_layers, self.batch_first, bool(self.bidirectional))
def flatten_parameters(self): """Resets parameter data pointer so that they can use faster code paths. Right now, this works only if the module is on the GPU and cuDNN is enabled. Otherwise, it's a no-op. """ any_param = next(self.parameters()).data if not any_param.is_cuda or not torch.backends.cudnn.is_acceptable(any_param): self._data_ptrs = [] return # If any parameters alias, we fall back to the slower, copying code path. This is # a sufficient check, because overlapping parameter buffers that don't completely # alias would break the assumptions of the uniqueness check in # Module.named_parameters(). unique_data_ptrs = set(p.data_ptr() for l in self.all_weights for p in l) if len(unique_data_ptrs) != sum(len(l) for l in self.all_weights): self._data_ptrs = [] return with torch.cuda.device_of(any_param): import torch.backends.cudnn.rnn as rnn weight_arr = list(itertools.chain.from_iterable(self.all_weights)) weight_stride0 = len(self.all_weights[0]) # NB: This is a temporary hack while we still don't have Tensor # bindings for ATen functions with torch.no_grad(): # NB: this is an INPLACE function on weight_arr, that's why the # no_grad() is necessary. weight_buf = torch._cudnn_rnn_flatten_weight( weight_arr, weight_stride0, self.input_size, rnn.get_cudnn_mode(self.mode), self.hidden_size, self.num_layers, self.batch_first, bool(self.bidirectional)) self._param_buf_size = weight_buf.size(0) self._data_ptrs = list(p.data.data_ptr() for p in self.parameters())