def backward_extended(self, grad_output, grad_hy): input, hx, weight, output = self.saved_tensors input = input.contiguous() ### start EB-SPECIFIC CODE ### weight = weight.clamp(min=0) if torch.use_pos_weights else weight.clamp(max=0).abs() if input.data.min() < 0: input.data = input.data - input.data.min() normfactor = input.new() if torch.is_tensor(hx): hy = hx.new() else: hy = tuple(h.new() for h in hx) cudnn.rnn.forward(self, input, hx, weight, normfactor, hy) grad_output /= normfactor + 1e-10 ### stop EB-SPECIFIC CODE ### grad_input, grad_weight, grad_hx = None, None, None assert cudnn.is_acceptable(input) grad_input = input.new() if torch.is_tensor(hx): grad_hx = input.new() else: grad_hx = tuple(h.new() for h in hx) if self.retain_variables: self._reserve_clone = self.reserve.clone() cudnn.rnn.backward_grad( self, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx) if any(self.needs_input_grad[1:]): grad_weight = [tuple(w.new().resize_as_(w) for w in layer_weight) for layer_weight in weight] cudnn.rnn.backward_weight( self, input, hx, output, weight, grad_weight) else: grad_weight = [(None,) * len(layer_weight) for layer_weight in weight] if self.retain_variables: self.reserve = self._reserve_clone del self._reserve_clone return grad_input, grad_weight, grad_hx
def forward(input, *fargs, **fkwargs): if cudnn.is_acceptable(input.data): func = CudnnRNN(*args, **kwargs) else: func = AutogradRNN(*args, **kwargs) # Hack for the tracer that allows us to represent RNNs as single # nodes and export them to ONNX in this form # Check the first argument explicitly to reduce the overhead of creating # the lambda. We need special handling here because the forward() # function gets reconstructed each and every time when RNN() is invoked # and we don't want to pay the cost of decorator invocation import torch if torch._C._jit_is_tracing(input): import torch.onnx.symbolic sym = torch.onnx.symbolic.RNN_symbolic_builder(*args, **kwargs) cell_type = args[0] bound_symbolic = partial(torch.onnx.symbolic.rnn_trace_override_symbolic, cell_type, func, sym) decorator = torch.onnx.symbolic_override_first_arg_based(bound_symbolic) func = decorator(func) return func(input, *fargs, **fkwargs)
def forward(self, weight, bias, input): # Assert we're using cudnn for i in ([weight, bias, input]): if i is not None and not (cudnn.is_acceptable(i)): raise Exception( 'You must be using CUDNN to use _EfficientBatchNorm') # Create save variables self.save_mean = self.running_mean.new() self.save_mean.resize_as_(self.running_mean) self.save_var = self.running_var.new() self.save_var.resize_as_(self.running_var) # Do forward pass - store in input variable res = type(input)(self.storage) res.resize_as_(input) #print('hahahahahah--------', input.size(), ' ', res.size(), ' ', weight.size(), ' ', bias.size(), ' ',self.running_mean.size(), ' ', # self.running_var.size(), ' ',self.save_mean.size(), ' ', self.save_var.size(), ' ', type(self.training), type(self.momentum), type(self.eps)) torch._C._cudnn_batch_norm_forward(input, res, weight, bias, self.running_mean, self.running_var, self.save_mean, self.save_var, self.training, self.momentum, self.eps) return res
def affine_grid_generator(theta, size): if theta.data.is_cuda and cudnn.enabled and cudnn.is_acceptable( theta.data) and len(size) == 4: N, C, H, W = size return torch.cudnn_affine_grid_generator(theta, N, C, H, W) else: return AffineGridGenerator.apply(theta, size)
def forward(self, input, weight=None, bias=None): self.save_for_backward(input, weight, bias) # don't use cuDNN for half inputs because cuDNN requires the weight and # bias tensors to be floats, unlike THCUNN which requires half tensors. self.use_cudnn = (cudnn.is_acceptable(input) and weight is not None and bias is not None and not isinstance(input, torch.cuda.HalfTensor)) # temporary buffers used in forward and backward num_features = input.size(1) self._save_mean = input.new(num_features) self._save_std = input.new(num_features) output = input.new(input.size()) if self.use_cudnn: torch._C._cudnn_batch_norm_forward(input, output, weight, bias, self.running_mean, self.running_var, self._save_mean, self._save_std, self.training, self.momentum, self.eps) else: backend = type2backend[type(input)] backend.BatchNormalization_updateOutput( backend.library_state, input, output, weight, bias, self.running_mean, self.running_var, self._save_mean, self._save_std, self.training, self.momentum, self.eps) return output
def backward(ctx, grad_output): input, grid = ctx.saved_tensors padding_mode = ctx.padding_mode if cudnn.is_acceptable(input) and padding_mode == 'zeros': grad_input = input.new(input.size()) grad_grid = grid.new(grid.size()) grid = grid.contiguous() if 0 in input.stride(): input = input.contiguous() # Sometimes grad_output is a scalar (like 1) expanded as a tensor. # cudnn requires a tensor that has non-zero strides. if 0 in grad_output.stride(): grad_output = grad_output.contiguous() torch._C._cudnn_grid_sampler_backward(input, grad_input, grid, grad_grid, grad_output) else: backend = type2backend[type(input)] grad_input = input.new(input.size()) grad_grid = grid.new(grid.size()) backend.SpatialGridSamplerBilinear_updateGradInput( backend.library_state, input, grad_input, grid, grad_grid, grad_output, padding_mode) return grad_input, grad_grid, None
def forward(ctx, input, grid, padding_mode='zeros'): ctx.save_for_backward(input, grid) if padding_mode == 'zeros': ctx.padding_mode = MODE_ZEROS elif padding_mode == 'border': ctx.padding_mode = MODE_BORDER else: raise ValueError( "padding_mode needs to be 'zeros' or 'border', but got {}". format(padding_mode)) grid_sz = grid.size() if cudnn.is_acceptable(input) and padding_mode == 'zeros': output = input.new(grid_sz[0], input.size(1), grid_sz[1], grid_sz[2]) grid = grid.contiguous() if 0 in input.stride(): input = input.contiguous() torch._C._cudnn_grid_sampler_forward(input, grid, output) else: backend = type2backend[type(input)] output = input.new(grid_sz[0], input.size(1), grid_sz[1], grid_sz[2]) backend.SpatialGridSamplerBilinear_updateOutput( backend.library_state, input, grid, output, ctx.padding_mode) return output
def backward_extended(self, grad_output, grad_hy): input, hx, weight, output = self.saved_tensors grad_input, grad_weight, grad_hx = None, None, None assert (cudnn.is_acceptable(input)) grad_input = input.new() grad_weight = input.new() grad_hx = input.new() if torch.is_tensor(hx): grad_hx = input.new() else: grad_hx = tuple(h.new() for h in hx) cudnn.rnn.backward_grad(self, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx) if self.needs_input_grad[1]: grad_weight = [ tuple(w.new().resize_as_(w).zero_() for w in layer_weight) for layer_weight in weight ] cudnn.rnn.backward_weight(self, input, hx, output, weight, grad_weight) return grad_input, grad_weight, grad_hx
def forward(self, input, weight, bias=None): output = input.new(*self._output_size(input, weight)) if bias is not None: self.save_for_backward(input, weight, bias) else: self.save_for_backward(input, weight) if cudnn.is_acceptable(input): self._cudnn_info = torch._C._cudnn_convolution_forward( input, weight, bias, output, self.pad[0], self.pad[1], self.stride[0], self.stride[1], self.groups, cudnn.benchmark) else: # TODO: implement groups for THNN if self.groups != 1: raise ValueError('THNN does not support groups') backend = type2backend[type(input)] self._finput = input.new() self._fgrad_input = input.new() backend.SpatialConvolutionMM_updateOutput( backend.library_state, input, output, weight, bias, self._finput, self._fgrad_input, weight.size(3), weight.size(2), self.stride[1], self.stride[0], self.pad[1], self.pad[0]) return output
def forward(self, weight, bias, input): # Assert we're using cudnn for i in ([weight, bias, input]): if i is not None and not (cudnn.is_acceptable(i)): raise Exception( 'You must be using CUDNN to use EfficientBatchNorm') # Create save variables self.save_mean = self.running_mean.new() self.save_mean.resize_as_(self.running_mean) self.save_var = self.running_var.new() self.save_var.resize_as_(self.running_var) # Do forward pass - store in input variable cur_device_id = weight.get_device() res = type(input)( self.storage.change_device(cur_device_id)).resize_as_(input) assert weight.get_device() == res.get_device(), \ "input and output should be on the same chip!" torch._C._cudnn_batch_norm_forward(input, res, weight, bias, self.running_mean, self.running_var, self.save_mean, self.save_var, self.training, self.momentum, self.eps) return res
def forward(input, *fargs, **fkwargs): if cudnn.is_acceptable(input.data): func = CudnnRNN(*args, **kwargs) else: func = AutogradRNN(*args, **kwargs) # Hack for the tracer that allows us to represent RNNs as single # nodes and export them to ONNX in this form # Check the first argument explicitly to reduce the overhead of creating # the lambda. We need special handling here because the forward() # function gets reconstructed each and every time when RNN() is invoked # and we don't want to pay the cost of decorator invocation import torch if torch._C._jit_is_tracing(input): import torch.onnx.symbolic sym = torch.onnx.symbolic.RNN_symbolic_builder(*args, **kwargs) cell_type = args[0] bound_symbolic = partial(torch.onnx.symbolic.rnn_trace_override_symbolic, cell_type, func, sym) decorator = torch.onnx.symbolic_override_first_arg_based(bound_symbolic) func = decorator(func) return func(input, *fargs, **fkwargs)
def forward(self, weight, bias, input): # Assert we're using cudnn for i in ([weight, bias, input]): if i is not None and not(cudnn.is_acceptable(i)): raise Exception('You must be using CUDNN to use EfficientBatchNorm') # Create save variables self.save_mean = self.running_mean.new() self.save_mean.resize_as_(self.running_mean) self.save_var = self.running_var.new() self.save_var.resize_as_(self.running_var) # Do forward pass - store in input variable cur_device_id = weight.get_device() res = type(input)(self.storage.change_device(cur_device_id)).resize_as_(input) assert weight.get_device() == res.get_device(), \ "input and output should be on the same chip!" torch._C._cudnn_batch_norm_forward(input, res, weight, bias, self.running_mean, self.running_var, self.save_mean, self.save_var, self.training, self.momentum, self.eps) return res
def grid_sampler(input, grid, padding_mode): if (cudnn.is_acceptable(input.data) and padding_mode == 'zeros' and input.dim() == 4 and input.size(1) <= 1024): # as of cudnn 7102, will not work for larger than 1024 return torch.cudnn_grid_sampler(input, grid) else: return GridSampler.apply(input, grid, padding_mode)
def affine_grid_generator(theta, size): # type: (Tensor, List[int]) -> Tensor if theta.is_cuda and cudnn.enabled and cudnn.is_acceptable(theta) and len( size) == 4 and size[0] < 65536: N, C, H, W = size ret = torch.cudnn_affine_grid_generator(theta, N, C, H, W) else: ret = torch.affine_grid_generator(theta, size) return ret
def affine_grid_generator(theta, size): if theta.data.is_cuda and len(size) == 4: if not cudnn.enabled: raise RuntimeError("AffineGridGenerator needs CuDNN for " "processing CUDA inputs, but CuDNN is not enabled") if not cudnn.is_acceptable(theta.data): raise RuntimeError("AffineGridGenerator generator theta not acceptable for CuDNN") N, C, H, W = size return torch.cudnn_affine_grid_generator(theta, N, C, H, W) else: return AffineGridGenerator.apply(theta, size)
def affine_grid_generator(theta, size): if theta.data.is_cuda: if not cudnn.enabled: raise RuntimeError("AffineGridGenerator needs CuDNN for " "processing CUDA inputs, but CuDNN is not enabled") if not cudnn.is_acceptable(theta.data): raise RuntimeError("AffineGridGenerator generator theta not acceptable for CuDNN") N, C, H, W = size return torch._C._VariableBase.cudnn_affine_grid_generator(theta, N, C, H, W) else: return AffineGridGenerator.apply(theta, size)
def backward(self, grad_output): tensors = self.saved_tensors if len(tensors) == 2: input, weight = tensors bias = None else: input, weight, bias = tensors grad_input, grad_weight, grad_bias = None, None, None if cudnn.is_acceptable(input): if self.needs_input_grad[0]: grad_input = input.new().resize_as_(input) torch._C._cudnn_convolution_backward_data( grad_output, grad_input, weight, self._cudnn_info, cudnn.benchmark) if self.needs_input_grad[1]: grad_weight = weight.new().resize_as_(weight) torch._C._cudnn_convolution_backward_filter( grad_output, input, grad_weight, self._cudnn_info, cudnn.benchmark) if bias is not None and self.needs_input_grad[2]: grad_bias = bias.new().resize_as_(bias) torch._C._cudnn_convolution_backward_bias( grad_output, grad_bias, self._cudnn_info) else: backend = type2backend[type(input)] if self.needs_input_grad[0]: grad_input = input.new().resize_as_(input).zero_() backend.SpatialConvolutionMM_updateGradInput( backend.library_state, input, grad_output, grad_input, weight, self._finput, self._fgrad_input, weight.size(3), weight.size(2), self.stride[1], self.stride[0], self.pad[1], self.pad[0]) if any(self.needs_input_grad[1:]): grad_weight = weight.new().resize_as_(weight).zero_() if bias is not None and self.needs_input_grad[2]: grad_bias = bias.new().resize_as_(bias).zero_() else: grad_bias = None backend.SpatialConvolutionMM_accGradParameters( backend.library_state, input, grad_output, grad_weight, grad_bias, self._finput, self._fgrad_input, weight.size(3), weight.size(2), self.stride[1], self.stride[0], self.pad[1], self.pad[0], 1) if bias is not None: return grad_input, grad_weight, grad_bias else: return grad_input, grad_weight
def forward(self, weight, bias, input): # Assert we're using cudnn for i in ([weight, bias, input]): if i is not None and not (cudnn.is_acceptable(i)): raise Exception( 'You must be using CUDNN to use _EfficientBatchNorm') res = input.new(*self._output_size(input, weight)) self._cudnn_info = torch._C._cudnn_convolution_full_forward( input, weight, bias, res, (self.padding, self.padding), (self.stride, self.stride), (self.dilation, self.dilation), self.groups, cudnn.benchmark, True) return res
def forward_extended(self, input, weight, hx): assert cudnn.is_acceptable(input) output = input.new() if torch.is_tensor(hx): hy = hx.new() else: hy = tuple(h.new() for h in hx) cudnn.rnn.forward(self, input, hx, weight, output, hy) self.save_for_backward(input, hx, weight, output) return output, hy
def forward(input, *fargs, **fkwargs): if cudnn.is_acceptable(input.data): func = CudnnRNN(*args, **kwargs) else: func = AutogradRNN(*args, **kwargs) # Hack for the tracer that allows us to represent RNNs as single # nodes and export them to ONNX in this form if torch._C._jit_is_tracing(input): assert not fkwargs output = func(input, *fargs) return hack_onnx_rnn((input,) + fargs, output, args, kwargs) else: return func(input, *fargs, **fkwargs)
def forward(input, *fargs, **fkwargs): if cudnn.is_acceptable(input.data): func = CudnnRNN(*args, **kwargs) else: func = AutogradRNN(*args, **kwargs) # Hack for the tracer that allows us to represent RNNs as single # nodes and export them to ONNX in this form if torch._C._jit_is_tracing(input): assert not fkwargs output = func(input, *fargs) return hack_onnx_rnn((input, ) + fargs, output, args, kwargs) else: return func(input, *fargs, **fkwargs)
def forward_extended(self, input, weight, hx): assert cudnn.is_acceptable(input) # TODO: raise a warning if weight_data_ptr is None output = input.new() if torch.is_tensor(hx): hy = hx.new() else: hy = tuple(h.new() for h in hx) cudnn.rnn.forward(self, input, hx, weight, output, hy) self.save_for_backward(input, hx, weight, output) return output, hy
def forward(input, *fargs, **fkwargs): if cudnn.is_acceptable(input.data): func = CudnnRNN(*args, **kwargs) else: func = AutogradRNN(*args, **kwargs) # Hack for the tracer that allows us to represent RNNs as single # nodes and export them to ONNX in this form # It can be also used as a decorator at the higher level # Check the first argument explicitly to reduce the overhead of creating # the lambda if torch._C._jit_is_tracing(input): func = RNN_symbolic_builder(*args, **kwargs)(func) return func(input, *fargs, **fkwargs)
def forward(self, weight, bias, input): # Assert we're using cudnn for i in ([weight, bias, input]): if i is not None and not(cudnn.is_acceptable(i)): raise Exception('You must be using CUDNN to use _EfficientBatchNorm') res = input.new(*self._output_size(input, weight)) self._cudnn_info = torch._C._cudnn_convolution_full_forward( input, weight, bias, res, (self.padding, self.padding), (self.stride, self.stride), (self.dilation, self.dilation), self.groups, cudnn.benchmark ) return res
def forward(input, *fargs, **fkwargs): if cudnn.is_acceptable(input.data): func = CudnnRNN(*args, **kwargs) else: func = AutogradRNN(*args, **kwargs) # Hack for the tracer that allows us to represent RNNs as single # nodes and export them to ONNX in this form # It can be also used as a decorator at the higher level # Check the first argument explicitly to reduce the overhead of creating # the lambda import torch if torch._C._jit_is_tracing(input): import torch.onnx.symbolic func = torch.onnx.symbolic.RNN_symbolic_builder(*args, **kwargs)(func) return func(input, *fargs, **fkwargs)
def _update_output(self, input, weight, bias): self.use_cudnn = cudnn.is_acceptable(input) and not self.is_dilated() if self.use_cudnn: output = input.new(*self._output_size(input, weight)) if self.transposed: self._cudnn_info = ( torch._C._cudnn_convolution_transpose_full_forward( input, weight, bias, output, self.padding, self.stride, self.groups, cudnn.benchmark)) else: self._cudnn_info = torch._C._cudnn_convolution_full_forward( input, weight, bias, output, self.padding, self.stride, self.groups, cudnn.benchmark) return output self._bufs = [[] for g in range(self.groups)] return self._thnn('update_output', input, weight, bias)
def forward(ctx, input, grid): ctx.save_for_backward(input, grid) grid_sz = grid.size() if cudnn.is_acceptable(input): output = input.new(grid_sz[0], input.size(1), grid_sz[1], grid_sz[2]) grid = grid.contiguous() if 0 in input.stride(): input = input.contiguous() torch._C._cudnn_grid_sampler_forward(input, grid, output) else: backend = type2backend[type(input)] output = input.new(grid_sz[0], input.size(1), grid_sz[1], grid_sz[2]) backend.SpatialGridSamplerBilinear_updateOutput( backend.library_state, input, grid, output) return output
def backward_extended(self, grad_output, grad_hy): input, hx, weight, output = self.saved_tensors input = input.contiguous() grad_input, grad_weight, grad_hx = None, None, None assert cudnn.is_acceptable(input) grad_input = input.new() if torch.is_tensor(hx): grad_hx = input.new() else: grad_hx = tuple(h.new() for h in hx) if self.retain_variables: self._reserve_clone = self.reserve.clone() cudnn.rnn.backward_grad( self, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx) if any(self.needs_input_grad[1:]): grad_weight = [tuple(w.new().resize_as_(w) for w in layer_weight) for layer_weight in weight] cudnn.rnn.backward_weight( self, input, hx, output, weight, grad_weight) else: grad_weight = [(None,) * len(layer_weight) for layer_weight in weight] if self.retain_variables: self.reserve = self._reserve_clone del self._reserve_clone return grad_input, grad_weight, grad_hx
def backward_extended(self, grad_output, grad_hy): input, hx, weight, output = self.saved_tensors input = input.contiguous() grad_input, grad_weight, grad_hx = None, None, None assert cudnn.is_acceptable(input) grad_input = input.new() if torch.is_tensor(hx): grad_hx = input.new() else: grad_hx = tuple(h.new() for h in hx) if self.retain_variables: self._reserve_clone = self.reserve.clone() cudnn.rnn.backward_grad( self, input, hx, weight, output, grad_output, grad_hy, grad_input, grad_hx) if any(self.needs_input_grad[1:]): grad_weight = [tuple(w.new().resize_as_(w) for w in layer_weight) for layer_weight in weight] cudnn.rnn.backward_weight( self, input, hx, output, weight, grad_weight) else: grad_weight = [(None,) * len(layer_weight) for layer_weight in weight] if self.retain_variables: self.reserve = self._reserve_clone del self._reserve_clone return grad_input, grad_weight, grad_hx
def backward(ctx, grad_output): input, grid = ctx.saved_tensors if cudnn.is_acceptable(input): grad_input = input.new(input.size()) grad_grid = grid.new(grid.size()) grid = grid.contiguous() if 0 in input.stride(): input = input.contiguous() torch._C._cudnn_grid_sampler_backward(input, grad_input, grid, grad_grid, grad_output) else: backend = type2backend[type(input)] grad_input = input.new(input.size()) grad_grid = grid.new(grid.size()) backend.SpatialGridSamplerBilinear_updateGradInput( backend.library_state, input, grad_input, grid, grad_grid, grad_output) return grad_input, grad_grid
def forward(input, weight, hidden, batch_sizes): has_biases = len(weight[0]) == 4 weight = sum(weight, type(weight[0])()) if cudnn.is_acceptable(input): dropout_seed = int(torch.IntTensor(1).random_()) with torch.cuda.device(input.get_device()): dropout_ts = cudnn.rnn.init_dropout_state( dropout, train, dropout_seed, dropout_state) else: dropout_ts = None if not variable_length: result = impl(input, hidden, weight, has_biases, num_layers, dropout, train, bidirectional, batch_first, flat_weight, dropout_ts) else: result = impl(input, batch_sizes, hidden, weight, has_biases, num_layers, dropout, train, bidirectional, flat_weight, dropout_ts) return result[0], (result[1] if hidden_is_tensor else result[1:])
def forward(self, weight, bias, input): # Assert we're using cudnn for i in ([weight, bias, input]): if i is not None and not(cudnn.is_acceptable(i)): raise Exception('You must be using CUDNN to use _EfficientBatchNorm') # Create save variables self.save_mean = self.running_mean.new() self.save_mean.resize_as_(self.running_mean) self.save_var = self.running_var.new() self.save_var.resize_as_(self.running_var) # Do forward pass - store in input variable res = type(input)(self.storage) res.resize_as_(input) torch._C._cudnn_batch_norm_forward( input, res, weight, bias, self.running_mean, self.running_var, self.save_mean, self.save_var, self.training, self.momentum, self.eps ) return res
def backward(ctx, grad_output): input, grid = ctx.saved_tensors if cudnn.is_acceptable(input): grad_input = input.new(input.size()) grad_grid = grid.new(grid.size()) grid = grid.contiguous() if 0 in input.stride(): input = input.contiguous() # Sometimes grad_output is a scalar (like 1) expanded as a tensor. # cudnn requires a tensor that has non-zero strides. if 0 in grad_output.stride(): grad_output = grad_output.contiguous() torch._C._cudnn_grid_sampler_backward(input, grad_input, grid, grad_grid, grad_output) else: backend = type2backend[type(input)] grad_input = input.new(input.size()) grad_grid = grid.new(grid.size()) backend.SpatialGridSamplerBilinear_updateGradInput( backend.library_state, input, grad_input, grid, grad_grid, grad_output) return grad_input, grad_grid
def forward(ctx, input, grid, padding_mode='zeros'): ctx.save_for_backward(input, grid) if padding_mode == 'zeros': ctx.padding_mode = MODE_ZEROS elif padding_mode == 'border': ctx.padding_mode = MODE_BORDER else: raise ValueError("padding_mode needs to be 'zeros' or 'border', but got {}" .format(padding_mode)) grid_sz = grid.size() if cudnn.is_acceptable(input) and padding_mode == 'zeros': output = input.new(grid_sz[0], input.size(1), grid_sz[1], grid_sz[2]) grid = grid.contiguous() if 0 in input.stride(): input = input.contiguous() torch._C._cudnn_grid_sampler_forward(input, grid, output) else: backend = type2backend[type(input)] output = input.new(grid_sz[0], input.size(1), grid_sz[1], grid_sz[2]) backend.SpatialGridSamplerBilinear_updateOutput( backend.library_state, input, grid, output, ctx.padding_mode) return output
def forward(input, *fargs, **fkwargs): if cudnn.is_acceptable(input.data): func = CudnnRNN(*args, **kwargs) else: func = AutogradRNN(*args, **kwargs) return func(input, *fargs, **fkwargs)
def grid_sampler(input, grid, padding_mode): if cudnn.is_acceptable(input.data) and padding_mode == 'zeros' and input.dim() == 4: return torch.cudnn_grid_sampler(input, grid) else: return GridSampler.apply(input, grid, padding_mode)
def forward(input, *fargs, **fkwargs): if cudnn.is_acceptable(input.data): func = CudnnRNN(*args, **kwargs) else: func = AutogradRNN(*args, **kwargs) return func(input, *fargs, **fkwargs)
def _enforce_cudnn(input): if not cudnn.enabled: raise RuntimeError("AffineGridGenerator needs CuDNN for " "processing CUDA inputs, but CuDNN is not enabled") assert cudnn.is_acceptable(input)
# as of 2017-08-14, windows version only available via conda install -c peterjc123 pytorch # CUDA TEST import torch from torch.autograd import Variable from torch import nn x = torch.Tensor([1.0]) xx = x.cuda() print(xx) # CUDNN TEST from torch.backends import cudnn print(cudnn.is_acceptable(xx)) word_embedding = nn.Embedding(10, 300).cuda() bio_embedding = nn.Embedding(10, 32).cuda() # a batch of 2 samples of 4 indices each word_input = Variable(torch.LongTensor([[1,2,4,5],[4,3,2,9]]).cuda()) bio_input = Variable(torch.LongTensor([[1,2,4,5],[4,3,2,9]]).cuda()) wb = word_embedding(word_input) bb = bio_embedding(bio_input) input_emd = torch.cat((wb, bb), dim=2) print(input_emd.size()) loss = input_emd.sum() print(loss) loss.backward()
def _enforce_cudnn(input): if not cudnn.enabled: raise RuntimeError( "AffineGridGenerator needs CuDNN for " "processing CUDA inputs, but CuDNN is not enabled") assert cudnn.is_acceptable(input)
def grid_sampler(input, grid, padding_mode): if cudnn.is_acceptable( input.data) and padding_mode == 'zeros' and input.dim() == 4: return torch.cudnn_grid_sampler(input, grid) else: return GridSampler.apply(input, grid, padding_mode)
def _enforce_cudnn(input): if not cudnn.enabled: raise RuntimeError( "GridSampler needs CuDNN for processing CUDA inputs," " but CuDNN is not enabled") assert cudnn.is_acceptable(input)
def grid_sampler(input, grid, padding_mode): if cudnn.is_acceptable(input.data) and padding_mode == 'zeros': return torch._C._VariableBase.cudnn_grid_sampler(input, grid) else: return GridSampler.apply(input, grid, padding_mode)
# 测试pytorch 的GPU版本是否安装成功 import torch # 如正常则静默 a = torch.Tensor([1.]) # 如正常则静默 a.cuda() # 如正常则返回"tensor([ 1.], device='cuda:0')" print(a) from torch.backends import cudnn # 如正常则静默 print(cudnn.is_acceptable(a.cuda())) # 如正常则返回 "True"
def _enforce_cudnn(input): if not cudnn.enabled: raise RuntimeError("GridSampler needs CuDNN for processing CUDA inputs," " but CuDNN is not enabled") assert cudnn.is_acceptable(input)
# CUDA TEST import torch x = torch.Tensor([1.0]) xx = x.cuda() print(xx) # CUDNN TEST from torch.backends import cudnn print(cudnn.is_acceptable(xx))
import torch from torch.backends import cudnn if __name__ == '__main__': print('cuda :', torch.cuda.is_available()) x = torch.Tensor([10.0]) x = x.cuda() print(x) print('cudnn :', cudnn.is_acceptable(x))