def test_custom_compound_op_autograd(self): # Test that a custom compound op (i.e. a custom op that just calls other aten ops) # correctly returns gradients of those other ops source = """ #include <torch/library.h> torch::Tensor my_add(torch::Tensor x, torch::Tensor y) { return x + y; } TORCH_LIBRARY(my, m) { m.def("add", &my_add); } """ torch.utils.cpp_extension.load_inline( name="is_python_module", cpp_sources=source, verbose=True, is_python_module=False, ) a = torch.randn(5, 5, requires_grad=True) b = torch.randn(5, 5, requires_grad=True) gradcheck(torch.ops.my.add, [a, b], eps=1e-2)
def _test_function_backward(self, dim, device, contiguous): dtype = torch.float64 deform_func = self._get_test_fn(dim) (x, weight, offset, mask, bias, n_in_channels, n_out_channels, kernel_size, stride, padding, dilation) = self._get_fn_args(dim, device=device, contiguous=contiguous, batch_sz=1, dtype=dtype) script_func = torch.jit.script(deform_func) gradcheck(lambda inp, wei, off, msk, bi: script_func(inp, wei, off, msk, bi, stride, padding, dilation), (x, weight, offset, mask, bias), nondet_tol=1e-5)
def test_gradient(self): alphabet_size = 5 max_targets_len = 10 max_sequence_len = 20 batch_size = 2 np.random.seed(678) # fix random seed targets_lengths = np.random.randint(low=1, high=max_targets_len + 1, size=batch_size) logits_lengths = targets_lengths + np.random.randint( low=0, high=(max_sequence_len - max_targets_len + 1), size=batch_size) logits = np.random.randn(batch_size, max_sequence_len, alphabet_size + 1) # 1 - blank, full_alphabet: alphabet_size + 1 targets = (1 + np.random.rand(batch_size, np.max(targets_lengths)) * alphabet_size).astype(np.int64) targets_lengths = torch.LongTensor(targets_lengths) logits_lengths = torch.LongTensor(logits_lengths) targets = torch.LongTensor(targets) logits = torch.DoubleTensor(logits).requires_grad_() input_ = (logits, targets, logits_lengths, targets_lengths) test_result = gradcheck(CTCLoss(blank_idx=0, time_major=False, after_logsoftmax=False), input_, eps=1e-6, atol=1e-4) self.assertTrue(test_result, "Gradient Invalid")
def test_forward(self): input = (Variable(torch.randn(1, 2, 4).double(), requires_grad=True), ) test = gradcheck(SimpleLSTM(2, 4, 1).double(), input, eps=1e-6, atol=1e-4) print(test)
def _check_helper(self, device, dtype, op, variant, check): if variant is None: self.skipTest("Skipped! Variant not implemented.") if not op.supports_dtype(dtype, torch.device(device).type): self.skipTest( f"Skipped! {op.name} does not support dtype {str(dtype)}") samples = op.sample_inputs(device, dtype, requires_grad=True) for sample in samples: partial_fn = partial(variant, **sample.kwargs) if check == 'gradcheck': self.assertTrue( gradcheck(partial_fn, (sample.input, ) + sample.args, check_grad_dtypes=True)) elif check == 'gradgradcheck': self.assertTrue( gradgradcheck(partial_fn, (sample.input, ) + sample.args, gen_non_contig_grad_outputs=False, check_grad_dtypes=True)) self.assertTrue( gradgradcheck(partial_fn, (sample.input, ) + sample.args, gen_non_contig_grad_outputs=True, check_grad_dtypes=True)) else: self.assertTrue(False, msg="Unknown check requested!")
def _check_helper(self, device, dtype, op, variant, check): if variant is None: self.skipTest("Skipped! Variant not implemented.") if not op.supports_dtype(dtype, torch.device(device).type): self.skipTest(f"Skipped! {op.name} does not support dtype {str(dtype)}") samples = op.sample_inputs(device, dtype, requires_grad=True) for sample in samples: if sample.output_process_fn_grad is not None: out_fn = sample.output_process_fn_grad def variant_out_fn(*args, **kwargs): return out_fn(variant(*args, **kwargs)) else: variant_out_fn = variant def fn(*inputs): output = variant_out_fn(*inputs, **sample.kwargs) return op.output_func(output) if check == 'gradcheck': self.assertTrue(gradcheck(fn, (*sample.input,) + sample.args, check_batched_grad=op.check_batched_grad, check_grad_dtypes=True)) elif check == 'gradgradcheck': self.assertTrue(gradgradcheck(fn, (*sample.input,) + sample.args, gen_non_contig_grad_outputs=False, check_batched_grad=op.check_batched_gradgrad, check_grad_dtypes=True)) self.assertTrue(gradgradcheck(fn, (*sample.input,) + sample.args, gen_non_contig_grad_outputs=True, check_batched_grad=op.check_batched_gradgrad, check_grad_dtypes=True)) else: self.assertTrue(False, msg="Unknown check requested!")
def _check_helper(self, device, dtype, op, variant, check): if variant is None: self.skipTest("Skipped! Variant not implemented.") if not op.supports_dtype(dtype, torch.device(device).type): self.skipTest( f"Skipped! {op.name} does not support dtype {str(dtype)}") def is_inplace(variant): if hasattr(variant, "__wrapped__"): return variant.__wrapped__ is op.get_inplace() return variant is op.get_inplace() samples = op.sample_inputs(device, dtype, requires_grad=True) for sample in samples: if sample.broadcasts_input and is_inplace(variant): continue # Note on TensorList inputs # # gradcheck does not support TensorList inputs so here we pass TensorList # inputs of size n as n single Tensor inputs to gradcheck and wrap the op # in a function that puts the n Tensor inputs back into a TensorList def fn(*inputs): # Put tensors back into TensorList since we splat them when passing to gradcheck if is_iterable_of_tensors(sample.input): n = len(sample.input) inputs = (inputs[:n], *inputs[n:]) output = op.gradcheck_wrapper(variant, *inputs, **sample.kwargs) if sample.output_process_fn_grad is not None: return sample.output_process_fn_grad(output) return output # Splat TensorList inputs into single Tensor inputs gradcheck_args = (sample.input, ) if isinstance( sample.input, torch.Tensor) else tuple(sample.input) gradcheck_args += sample.args if check == 'gradcheck': self.assertTrue( gradcheck(fn, gradcheck_args, check_batched_grad=op.check_batched_grad, check_grad_dtypes=True)) elif check == 'gradgradcheck': self.assertTrue( gradgradcheck(fn, gradcheck_args, gen_non_contig_grad_outputs=False, check_batched_grad=op.check_batched_gradgrad, check_grad_dtypes=True)) self.assertTrue( gradgradcheck(fn, gradcheck_args, gen_non_contig_grad_outputs=True, check_batched_grad=op.check_batched_gradgrad, check_grad_dtypes=True)) else: self.assertTrue(False, msg="Unknown check requested!")
def test_layer_gpu(self): if not torch.cuda.is_available(): self.skipTest("CUDA not available") layer = DenseLayer(30, 40, lambda t: torch.nn.init.normal(t, -1, 1), dtype=torch.cuda.DoubleTensor) self.assertTrue( gradcheck(layer, self.inputs_layer_cuda, raise_exception=False) )
def test_2d(n, m, w): x = Variable(torch.randn(n, m), requires_grad=True) w = Variable(0.1 + torch.Tensor([w]), requires_grad=True) tv_args = {'method': 'dr', 'max_iters': 1000, 'n_threads': 6} assert gradcheck(TotalVariation2d(tv_args=tv_args), (x, w), eps=1e-5, atol=1e-2, rtol=1e-3)
def test_1dw(n, w): x = Variable(10 * torch.randn(n), requires_grad=True) w = Variable(0.1 + w * torch.rand(n - 1), requires_grad=True) tv_args = {'method': 'tautstring'} assert gradcheck(TotalVariation1d(tv_args=tv_args), (x, w), eps=5e-5, atol=5e-2, rtol=1e-2)
def test_1d(n, w): x = Variable(torch.randn(n), requires_grad=True) w = Variable(torch.Tensor([w]), requires_grad=True) tv_args = {'method': 'condattautstring'} assert gradcheck(TotalVariation1d(tv_args=tv_args), (x, w), eps=1e-5, atol=1e-2, rtol=1e-3)
def test_2dw(n, m, w): x = Variable(torch.randn(n, m), requires_grad=True) w_r = Variable(0.1 + w * torch.rand(n, m - 1), requires_grad=True) w_c = Variable(0.1 + w * torch.rand(n - 1, m), requires_grad=True) tv_args = {'max_iters': 100, 'n_threads': 6} assert gradcheck(TotalVariation2dWeighted(tv_args=tv_args), (x, w_r, w_c), eps=1e-5, atol=5e-2, rtol=1e-3)
def testGradSmoothSVMth_loss(self): svm_topk_smooth_th = SmoothTopkSVM(self.n_classes, tau=self.tau, k=self.k) for scale in (1e-4, 1e-3, 1e-2, 1e-1, 1e0, 1e1, 1e2, 1e3, 1e4): x = self.x * scale x = Variable(x, requires_grad=True) assert gradcheck(lambda x: svm_topk_smooth_th(x, V(self.y)), (x,), atol=1e-2, rtol=1e-3, eps=max(1e-4 * scale, 1e-2)), \ "failed with scale {}".format(scale)
def test_autograd_from_mkldnn(self): # MKLDNN only supports float32 root = torch.randn(4, 5, dtype=torch.float32).to_mkldnn().requires_grad_() def func(root): return root.to_dense() # because MKLDNN only supports float32, we need to lessen the precision. # these numbers are just empirical results that seem to work. self.assertWarnsRegex(lambda: gradcheck(func, [root], atol=4e-2, rtol=1e-2), 'double precision floating point')
def test_2d(b, n, m, w): x = Variable(torch.randn(b, n, m), requires_grad=True) w = Variable(0.1 + torch.Tensor([[w]] * b), requires_grad=True) tv_args = {'method': 'dr', 'max_iters': 1000, 'n_threads': 6} tv = TotalVariation2d(num_workers=b, tv_args=tv_args) with torch.no_grad(): batch_tv = tv(x, w) sample_tv = torch.stack([tv(x[i], w[i]) for i in range(b)]) assert torch.allclose(batch_tv, sample_tv) assert gradcheck(tv, (x, w), eps=1e-5, atol=1e-2, rtol=1e-3)
def test_1dw(b, n, w): x = Variable(10 * torch.randn(b, n), requires_grad=True) w = Variable(0.1 + w * torch.rand(b, n - 1), requires_grad=True) tv_args = {'method': 'tautstring'} tv = TotalVariation1d(num_workers=b, tv_args=tv_args) with torch.no_grad(): batch_tv = tv(x, w) sample_tv = torch.stack([tv(x[i], w[i]) for i in range(b)]) assert bool(torch.allclose(batch_tv, sample_tv)) assert gradcheck(tv, (x, w), eps=5e-5, atol=5e-2, rtol=1e-2)
def test_2dw(b, n, m, w): x = Variable(torch.randn(b, n, m), requires_grad=True) w_r = Variable(0.1 + w * torch.rand(b, n, m - 1), requires_grad=True) w_c = Variable(0.1 + w * torch.rand(b, n - 1, m), requires_grad=True) tv_args = {'max_iters': 1000, 'n_threads': 6} tv = TotalVariation2dWeighted(num_workers=b, tv_args=tv_args) with torch.no_grad(): batch_tv = tv(x, w_r, w_c) sample_tv = torch.stack([tv(x[i], w_r[i], w_c[i]) for i in range(b)]) assert torch.allclose(batch_tv, sample_tv) assert gradcheck(tv, (x, w_r, w_c), eps=1e-5, atol=5e-2, rtol=1e-3)
def test_k_winners2d_grad(self): """ Test gradient """ x = torch.randn(self.x.size(), dtype=torch.double, requires_grad=True) n, c, h, w = x.shape kw = KWinners2dLocal(percent_on=0.5, channels=c, k_inference_factor=1.0, boost_strength=0.0, boost_strength_factor=1.0, duty_cycle_period=1000) self.assertTrue(gradcheck(kw, x, raise_exception=True))
def _check_helper(self, device, dtype, op, variant, check): if variant is None: self.skipTest("Skipped! Variant not implemented.") if not op.supports_dtype(dtype, torch.device(device).type): self.skipTest( f"Skipped! {op.name} does not support dtype {str(dtype)}") samples = op.sample_inputs(device, dtype, requires_grad=True) for sample in samples: if sample.output_process_fn_grad is not None: out_fn = sample.output_process_fn_grad def variant_out_fn(*args, **kwargs): return out_fn(variant(*args, **kwargs)) else: variant_out_fn = variant def fn(*inputs): # Pack input back into TensorList since we splat it when passing to gradcheck if is_iterable_of_tensors(sample.input): n = len(sample.input) inputs = (inputs[:n], *inputs[n:]) output = variant_out_fn(*inputs, **sample.kwargs) return op.output_func(output) # Gradcheck does not support TensorList so we splat it with the remaining args gradcheck_args = (sample.input, ) if isinstance( sample.input, torch.Tensor) else tuple(sample.input) gradcheck_args += sample.args if check == 'gradcheck': self.assertTrue( gradcheck(fn, gradcheck_args, check_batched_grad=op.check_batched_grad, check_grad_dtypes=True)) elif check == 'gradgradcheck': self.assertTrue( gradgradcheck(fn, gradcheck_args, gen_non_contig_grad_outputs=False, check_batched_grad=op.check_batched_gradgrad, check_grad_dtypes=True)) self.assertTrue( gradgradcheck(fn, gradcheck_args, gen_non_contig_grad_outputs=True, check_batched_grad=op.check_batched_gradgrad, check_grad_dtypes=True)) else: self.assertTrue(False, msg="Unknown check requested!")
def test_custom_compound_op_autograd(self): # Test that a custom compound op (i.e. a custom op that just calls other aten ops) # correctly returns gradients of those other ops source = """ #include <torch/script.h> torch::Tensor my_add(torch::Tensor x, torch::Tensor y) { return x + y; } static auto registry = torch::import() .def("my::add(Tensor x, Tensor y) -> Tensor", &my_add); """ torch.utils.cpp_extension.load_inline( name="is_python_module", cpp_sources=source, verbose=True, is_python_module=False, ) a = torch.randn(5, 5, requires_grad=True) b = torch.randn(5, 5, requires_grad=True) gradcheck(torch.ops.my.add, [a, b], eps=1e-2)
def test_forward(self): batch_size = 13 input_dims = 5 seq_len = 7 cell_size = 3 encoded_cell = 2 input = (Variable(torch.randn(batch_size, input_dims, seq_len).double(), requires_grad=True), ) test = gradcheck(NaiveSeq2Seq(input_dims, seq_len, cell_size, encoded_cell_size=encoded_cell).double(), input, eps=1e-6, atol=1e-4) print(test)
def test_1dw(b, n, w): x = Variable(10 * torch.randn(b, n), requires_grad=True) w = Variable(0.1 + w * torch.rand(b, n - 1), requires_grad=True) tv_args = {'method': 'tautstring'} for batch in [True, False]: tv = TotalVariation1d(average_connected=False, num_workers=min(8, b), multithread=True, batch_backward=batch, tv_args=tv_args) with torch.no_grad(): batch_tv = tv(x, w) sample_tv = torch.stack([tv(x[i], w[i]) for i in range(b)]) assert bool(torch.allclose(batch_tv, sample_tv)) assert gradcheck(tv, (x, w), eps=5e-5, atol=5e-2, rtol=1e-2)
def test_2d(b, n, m, w): x = Variable(torch.randn(b, n, m), requires_grad=True) w = Variable(0.1 + torch.Tensor([[w]] * b), requires_grad=True) tv_args = {'method': 'dr', 'max_iters': 100, 'n_threads': 2} for batch in [True, False]: tv = TotalVariation2d(refine=True, average_connected=False, num_workers=min(8, b), multithread=True, batch_backward=batch, tv_args=tv_args) with torch.no_grad(): batch_tv = tv(x, w) sample_tv = torch.stack([tv(x[i], w[i]) for i in range(b)]) assert torch.allclose(batch_tv, sample_tv) assert gradcheck(tv, (x, w), eps=1e-5, atol=1e-2, rtol=1e-3)
def test_layer_cpu(self): layer = DenseLayer(30, 40, lambda t: torch.nn.init.normal(t, -1, 1), dtype=torch.DoubleTensor) self.assertTrue( gradcheck(layer, self.inputs_layer, raise_exception=False) )
q = torch.randn(2, 3, 7, 5, requires_grad=True, device="cuda", dtype=torch.double) k = torch.randn(2, 3, 7, 5, requires_grad=True, device="cuda", dtype=torch.double) func = unfold_dot.UnfoldDot(w, True) assert gradcheck(func, [q, k], eps=1e-3) # , atol=1e-2, rtol=1e-2) # test matmul for restrict in [1, 3, 5]: a = torch.randn(2, 3, 7, restrict, requires_grad=True, device="cuda", dtype=torch.double) v = torch.randn(2, 3, 7, 5, requires_grad=True,
def __init__(self, filter_width, filter_height): super(ScipyConv2d, self).__init__() self.filter = Parameter(torch.randn(filter_width, filter_height)) self.bias = Parameter(torch.randn(1, 1)) def forward(self, input): return ScipyConv2dFunction.apply(input, self.filter, self.bias) ############################################################### # **Example usage:** module = ScipyConv2d(3, 3) print("Filter and bias: ", list(module.parameters())) input = torch.randn(10, 10, requires_grad=True) output = module(input) print("Output from the convolution: ", output) output.backward(torch.randn(8, 8)) print("Gradient for the input map: ", input.grad) ############################################################### # **Check the gradients:** from torch.autograd.gradcheck import gradcheck moduleConv = ScipyConv2d(3, 3) input = [torch.randn(20, 20, dtype=torch.double, requires_grad=True)] test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4) print("Are the gradients correct: ", test)
grad_feature = np.zeros(mat_out.shape) grad_out = grad_output.cpu().detach().numpy() bins_num = 256 / cell_range for idx in range(bins_num): num = np.sum(mat_index==idx) if num >0: grad_feature[mat_index==idx] = grad_out[idx] *1.0 / num return torch.tensor(grad_feature), None class EnergyLoss(nn.Module): def __init__(self): super(EnergyLoss, self).__init__() def forward(self, fea_out, fea_in): return EnergyFunction.apply(fea_out, fea_in) mod = EnergyLossTorch() m_in = torch.rand(1,1,3,3, dtype=torch.double, device=torch.device('cuda:0')) m_out = torch.rand(1,1,3,3, dtype=torch.double, requires_grad=True, device=torch.device('cuda:0')) m_in.data = (m_in.data +1) /2.0 m_out.data = (m_out.data +1) /2.0 out = mod(m_out, m_in) print 'out', out grad = torch.tensor(np.array(range(0,15)), dtype=torch.float, device=torch.device('cuda:0')) out.backward(grad) print 'grad', m_out.grad, m_in.grad inputfea = (m_out, m_in) from torch.autograd.gradcheck import gradcheck test = gradcheck(mod, inputfea, eps=1e-2, raise_exception=True) print test
def test_forward(self): input = (Variable(torch.randn(3, 2, 4).double(), requires_grad=True),) test = gradcheck(AttentionDecoder(2, 4, 1).double(), input, eps=1e-6, atol=1e-4) print(test)
def forward(self, X): return X**3 torch.manual_seed(0) X = torch.Tensor([3.]) X.requires_grad_() print('x:', X) cube = Cube() Y = cube(X) print('f(x):', Y) S = torch.sum(Y) S.backward() print('<Grad (f)(x), 1>:', X.grad) X.grad.zero_() X.requires_grad_() Y = cube(X) S = torch.sum(Y) G, = torch.autograd.grad(S, (X, ), create_graph=True) S = G.sum() S.backward() print('Grad^2 (f) 1:', X.grad) X.grad.zero_() gradcheck(cube, (X, ), eps=1e-4, atol=1e-2) X.grad.zero_() gradgradcheck(cube, (X, ), eps=1e-4, atol=1e-2)
def __init__(self, filter_width, filter_height): super(ScipyConv2d, self).__init__() self.filter = Parameter(torch.randn(filter_width, filter_height)) self.bias = Parameter(torch.randn(1, 1)) def forward(self, input): return ScipyConv2dFunction.apply(input, self.filter, self.bias) ############################################################### # **Example usage:** module = ScipyConv2d(3, 3) print("Filter and bias: ", list(module.parameters())) input = torch.randn(10, 10, requires_grad=True) output = module(input) print("Output from the convolution: ", output) output.backward(torch.randn(8, 8)) print("Gradient for the input map: ", input.grad) ############################################################### # **Check the gradients:** from torch.autograd.gradcheck import gradcheck moduleConv = ScipyConv2d(3, 3) input = [torch.randn(20, 20, dtype=torch.double, requires_grad=True)] test = gradcheck(moduleConv, input, eps=1e-6, atol=1e-4) print("Are the gradients correct: ", test)
def test_func_gpu(self): if not torch.cuda.is_available(): self.skipTest("CUDA not available") self.assertTrue( gradcheck(self.func, self.inputs_func_cuda, raise_exception=False) )