def test_backward(self): msg = '' try: x = torch.autograd.Variable(self.x, requires_grad=True) gradcheck(sqrtm, (x,), rtol=1e-2, atol=1 / type(self).sigma) except RuntimeError as exc: msg = str(exc) if msg != '': self.fail(msg)
def _gradcheck_log_prob(self, dist_ctor, ctor_params): # performs gradient checks on log_prob distribution = dist_ctor(*ctor_params) s = distribution.sample() self.assertEqual(s.size(), distribution.log_prob(s).size()) def apply_fn(*params): return dist_ctor(*params).log_prob(s) gradcheck(apply_fn, ctor_params, raise_exception=True)
def test_label_smoothing(self): input = Variable(torch.randn(3, 5), requires_grad=True) idx = torch.rand(3) * 4 target = Variable(idx.long()) criterion = LabelSmoothedNLLLoss() self.assertTrue(gradcheck( lambda x, y: criterion.apply(x, y, 0.1, 2, None), (input, target) )) weights = torch.ones(5) weights[2] = 0 self.assertTrue(gradcheck(lambda x, y: criterion.apply(x, y, 0.1, None, weights), (input, target))) self.assertTrue(gradcheck(lambda x, y: criterion.apply(x, y, 0.1, None, None), (input, target)))
def test_gradcheck(self, device): quaternion = torch.tensor([1., 0., 0., 0.]).to(device) quaternion = tensor_to_gradcheck_var(quaternion) # evaluate function gradient assert gradcheck(kornia.quaternion_exp_to_log, (quaternion, ), raise_exception=True)
def test_interp1_cspline(dtype, device): dtype_device_kwargs = {"dtype": dtype, "device": device} bc_type = "clamped" x = torch.tensor([0.0, 0.2, 0.3, 0.5, 0.8, 1.0], **dtype_device_kwargs).requires_grad_() y1 = torch.tensor([1.0, 1.5, 2.1, 1.1, 2.3, 2.5], **dtype_device_kwargs).requires_grad_() y2 = torch.tensor( [[1.0, 1.5, 2.1, 1.1, 2.3, 2.5], [0.8, 1.2, 2.2, 0.4, 3.2, 1.2]], **dtype_device_kwargs).requires_grad_() xq1 = torch.linspace(0, 1, 10, **dtype_device_kwargs).requires_grad_() xq2 = torch.linspace(0, 1, 4, **dtype_device_kwargs).requires_grad_() # true results (obtained from scipy.interpolate.CubicSpline) # from scipy.interpolate import CubicSpline # print("yq11:", CubicSpline(x.detach(), y1.detach(), bc_type=bc_type)(xq1.detach())) # print("yq12:", CubicSpline(x.detach(), y1.detach(), bc_type=bc_type)(xq2.detach())) # print("yq21:", CubicSpline(x.detach(), y2[1].detach(), bc_type=bc_type)(xq1.detach())) # print("yq22:", CubicSpline(x.detach(), y2[1].detach(), bc_type=bc_type)(xq2.detach())) yq11_true = torch.tensor([ 1., 1.10966822, 1.65764362, 2.08516021, 1.40964624, 1.04718761, 1.52146065, 2.19990128, 2.49291361, 2.5 ], **dtype_device_kwargs) yq12_true = torch.tensor([1., 2.08516021, 1.52146065, 2.5], **dtype_device_kwargs) yq21_true = torch.tensor( [[ 1., 1.10966822, 1.65764362, 2.08516021, 1.40964624, 1.04718761, 1.52146065, 2.19990128, 2.49291361, 2.5 ], [ 0.8, 0.75490137, 1.45269956, 2.13861483, 0.8463294, 0.57694735, 2.06124231, 3.20656708, 2.2875088, 1.2 ]], **dtype_device_kwargs) yq22_true = torch.tensor([[1., 2.08516021, 1.52146065, 2.5], [0.8, 2.13861483, 2.06124231, 1.2]], **dtype_device_kwargs) def interp(x, y, xq): return Interp1D(x, y, method="cspline", bc_type=bc_type, extrap="mirror")(xq) yq11 = interp(x, y1, xq1) yq12 = interp(x, y1, xq2) yq21 = interp(x, y2, xq1) yq22 = interp(x, y2, xq2) # import matplotlib.pyplot as plt # from scipy.interpolate import CubicSpline # xx = torch.linspace(0, 1, 1000, **dtype_device_kwargs) # xx2 = torch.linspace(-1, 2, 1000, **dtype_device_kwargs) # plt.plot(xx2, interp(x, y1, xx2).detach().numpy()) # plt.plot(xx, CubicSpline(x.detach(), y1.detach(), bc_type="clamped")(xx.detach())) # plt.plot(x.detach(), y1.detach(), 'x') # plt.show() assert torch.allclose(yq11, yq11_true) assert torch.allclose(yq12, yq12_true) assert torch.allclose(yq21, yq21_true) assert torch.allclose(yq22, yq22_true) gradcheck(interp, (x, y1, xq1)) gradcheck(interp, (x, y1, xq2)) gradcheck(interp, (x, y2, xq1)) gradcheck(interp, (x, y2, xq2)) gradgradcheck(interp, (x, y1, xq1)) gradgradcheck(interp, (x, y1, xq2)) gradgradcheck(interp, (x, y2, xq1)) gradgradcheck(interp, (x, y2, xq2))
def test_gradcheck(self, device): input = torch.rand(1, 2, 5, 7).to(device) input = utils.tensor_to_gradcheck_var(input) # to var assert gradcheck(kornia.augmentation.RandomAffine(0.), (input, ), raise_exception=True)
""" batch_size = t_dict['row'].shape[0] np_dict = {key: t_dict[key].numpy() for key in t_dict} ss = [] max_shape = np.zeros((2, ), dtype=np.int) for b in range(batch_size): shape = np_dict['shape'][b].astype(np.int) max_shape[0] = max(shape[0], max_shape[0]) max_shape[1] = max(shape[1], max_shape[1]) for b in range(batch_size): data = np_dict['data'][b] row = np_dict['row'][b] col = np_dict['col'][b] _ss = ssp.coo_matrix((data, (row, col)), shape=max_shape) ss.append(_ss) return ss if __name__ == '__main__': t = torch.tensor([[[1, 2, 3, 4], [11, 22, 33, 44]]]) t = dense_to_sparse(t) s = slicing_torch(t, torch.tensor((0, 0, 1)), preserve_dim=True) print(s.to_dense()) from torch.autograd import gradcheck input = (dense_to_sparse( torch.randn(1, 20, 30, dtype=torch.double, requires_grad=True)), torch.randn(1, 30, 40, dtype=torch.double, requires_grad=True)) test = gradcheck(sbmm, input, eps=1e-6, atol=1e-4) print(test)
def test_gradcheck(self, device): matrix = torch.eye(2, 3).to(device)[None] matrix = utils.tensor_to_gradcheck_var(matrix) # to var assert gradcheck(kornia.invert_affine_transform, (matrix,), raise_exception=True)
def grad_check(self, *args): from torch.autograd import gradcheck gradcheck(self.double(), *args, eps=1e-6, atol=1e-4)
def test_gradcheck(self, device, dtype): input = torch.rand(1, 2, 3, 4, device=device, dtype=dtype) input = utils.tensor_to_gradcheck_var(input) assert gradcheck(kornia.geometry.transform.Rescale(2.0, align_corners=False), (input,), raise_exception=True)
w = w.requires_grad_() K = None # DL, p2d, p3d, w, K = DL.cuda(0), p2d.cuda(0), p3d.cuda(0), w.cuda(0), K.cuda(0) if K is not None else None # Move everything to GPU # Run forward pass: y = DL(p2d, p3d, w, K) # Compute objective function value: f = node.objective(p2d, p3d, w, K, y=y) # Compute gradient: Dy = grad(y, (p2d, p3d, w), grad_outputs=torch.ones_like(y)) # print("Input p2d:\n{}".format(p2d.detach().cpu().numpy())) # print("Input p3d:\n{}".format(p3d.detach().cpu().numpy())) # print("Input w:\n{}".format(w.detach().cpu().numpy())) # print("Input K:\n{}".format(K)) print("Theta Ground-Truth:\n{}".format(theta.detach().cpu().numpy())) print("Theta Estimated:\n{}".format(y.detach().cpu().numpy())) print("Objective Function Value:\n{}".format(f.detach().cpu().numpy())) # print("Dy:\n{}\n{}\n{}".format(Dy[0].detach().cpu().numpy(), Dy[1].detach().cpu().numpy(), Dy[2].detach().cpu().numpy())) # Run gradcheck: # DL, p2d, p3d, w, K = DL.cpu(), p2d.cpu(), p3d.cpu(), w.cpu(), K.cpu() if K is not None else None # Move everything to CPU test = gradcheck(DL, (p2d, p3d, w, K), eps=1e-4, atol=1e-4, rtol=1e-4, raise_exception=True) print("gradcheck passed:", test)
def test_gradcheck(self): quaternion = torch.tensor([0., 0., 1.]) quaternion = tensor_to_gradcheck_var(quaternion) # evaluate function gradient assert gradcheck(kornia.quaternion_log_to_exp, (quaternion,), raise_exception=True)
# f = d.fill_rips(circle, 2, 2.1) # f.sort() # gradchek takes a tuple of tensor as input, check if your gradient # evaluated with these tensors are close enough to numerical # approximations and returns True if they all verify this condition. layer = Diagramlayer.apply ''' #### Test #### ''' weights = Variable(torch.tensor(circle).type(dtype), requires_grad=True) # diagramlayer = Diagramlayer.apply # dgms = diagramlayer(weights) # dgms = dgms.detach().numpy() # print dgms # for d_i in range(dgms.shape[0]): # # dgmpts = dgms[d_i] # print dgmpts.shape # dgmpts = np.delete(dgmpts, np.where((dgmpts == (-np.inf, -np.inf)).all(axis=1)), axis=0) # dgmpts0 = dgmpts # if len(dgmpts) > 0: # fig = plot_diagram2(dgmpts, 'Dimension {}'.format(0)) # else: # fig = plt.figure() # fig.savefig('dgm{}_{}.png'.format(d_i, "test")) saturation = 1.1 input = (weights, saturation) test = gradcheck(layer, input, eps=1e-4, atol=1e-3) print(test)
class GaussianFunction(Function): @staticmethod def forward(ctx, mean, std, vec): ctx.save_for_backward(mean, std, vec) # output = vec.mul_(std).add_(mean) output = vec.mul(std).add(mean) return output @staticmethod def backward(ctx, grad_output): mean, std, vec = ctx.saved_tensors grad_mean = grad_std = grad_vec = None if ctx.needs_input_grad[0]: grad_mean = torch.ones_like(mean).mul(grad_output) if ctx.needs_input_grad[1]: grad_std = vec.mul(grad_output) # grad_std = vec.mul(-1*grad_output) # grad_std = vec.mul(torch.exp(-1*grad_output)) return grad_mean, grad_std, grad_vec if __name__ == '__main__': gaussian = GaussianFunction.apply for i in range(1000): vec = torch.randn(10, 15) input = (torch.randn(10, 15, dtype=torch.double, requires_grad=True), torch.randn(10, 15, dtype=torch.double, requires_grad=True), vec) test = gradcheck(gaussian, input, eps=1e-6, atol=1e-4) print(test) torch.sigmoid()
def test_gradcheck(self, batch_size, device, dtype): trans_01 = identity_matrix(batch_size, device=device, dtype=dtype) trans_01 = utils.tensor_to_gradcheck_var(trans_01) # to var assert gradcheck(kornia.inverse_transformation, (trans_01, ), raise_exception=True)
ctx.grid_size = grid_size return grid_value @staticmethod def backward(ctx, grad_grid_value): pc, pc_value, grid_value, weight_sum, pc_grid_index = ctx.saved_tensors grad_pc = grad_pc_value = None if ctx.needs_input_grad[1]: grad_pc_value = rev_trilinear.cal_pc_value_grad(grad_grid_value, pc, weight_sum, pc_grid_index, ctx.grid_size) torch.cuda.synchronize() if ctx.needs_input_grad[0]: grad_pc = rev_trilinear.cal_pc_grad(grad_grid_value, grid_value, pc, weight_sum, pc_value, pc_grid_index, ctx.grid_size) torch.cuda.synchronize() return grad_pc, grad_pc_value, None if __name__ == '__main__': pc = torch.rand(1, 3, 6, dtype=torch.float32, requires_grad=True).cuda() pc_value = torch.rand(1, 1, 6, dtype=torch.float32, requires_grad=True).cuda() #pc = torch.tensor([[-0.5, -0.5], [0.5, 0.5]]).unsqueeze(0).transpose(1, 2).cuda() #pc_value = torch.tensor([1.0, -1.0]).reshape(1, 1, 2).cuda() pc.requires_grad_(True) pc_value.requires_grad_(True) grid_value = RevTrilinear.apply(pc, pc_value, 3) input = (pc, pc_value, 3) test = gradcheck(RevTrilinear.apply, input, eps=1e-3, atol=1e-3) print(test)
feat_size = 15 spatial_scale = 1.0 / 8 img_size = feat_size / spatial_scale num_imgs = 2 num_rois = 20 batch_ind = np.random.randint(num_imgs, size=(num_rois, 1)) rois = np.random.rand(num_rois, 4) * img_size * 0.5 rois[:, 2:] += img_size * 0.5 rois = np.hstack((batch_ind, rois)) feat = torch.randn(num_imgs, 16, feat_size, feat_size, requires_grad=True, device='cuda:0') rois = torch.from_numpy(rois).float().cuda() inputs = (feat, rois) print('Gradcheck for roi align...') test = gradcheck(RoIAlign(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) print(test) test = gradcheck(RoIAlign(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) print(test) test2 = gradcheck(RoIAlignAda(3, spatial_scale), inputs, atol=1e-3, eps=1e-3) print(test2) test2 = gradcheck(RoIAlignAda(3, spatial_scale, 2), inputs, atol=1e-3, eps=1e-3) print(test2)
def test_gradcheck(self): batch_size, channels, height, width = 1, 2, 5, 4 img = torch.rand(batch_size, channels, height, width) img = utils.tensor_to_gradcheck_var(img) # to var assert gradcheck(kornia.geometry.pyrdown, (img,), raise_exception=True)
import torch from torch.autograd import gradcheck import os.path as osp import sys sys.path.append(osp.abspath(osp.join(__file__, '../../'))) from roi_temporal_pooling import RoITemporalPool feat = torch.randn(4, 16, 15, 15, 15, requires_grad=True).cuda() rois = torch.Tensor([[0, 0, 50], [0, 10, 43], [1, 67, 110]]).cuda() inputs = (feat, rois) print('Gradcheck for roi pooling...') test = gradcheck(RoITemporalPool(4, 1.0 / 8), inputs, eps=1e-5, atol=1e-3) print(test)
def test_gradcheck(self, device): input = torch.rand(1, 2, 3, 4).to(device) input = utils.tensor_to_gradcheck_var(input) assert gradcheck(kornia.Rescale(2.0), (input, ), raise_exception=True)
def test_gradcheck(self, device): input = torch.rand((1, 3, 3)).to(device) # 4 x 4 input = utils.tensor_to_gradcheck_var(input) # to var assert gradcheck(RandomDepthicalFlip3D(p=1.), (input, ), raise_exception=True)
def test_warp_perspective_gradcheck(device, dtype): H, W = 5, 5 patch = torch.rand(1, 1, 5, 5, device=device, dtype=torch.float64, requires_grad=True) M = kornia.eye_like(3, patch) assert gradcheck(kornia.warp_perspective, (patch, M, (H, W),), raise_exception=True)
def test_gradcheck(self, device, dtype): torch.manual_seed(0) # for random reproductibility inputs3d = torch.rand((3, 3, 3), device=device, dtype=dtype) # 3 x 3 x 3 inputs3d = utils.tensor_to_gradcheck_var(inputs3d) # to var assert gradcheck(RandomEqualize3D(p=0.5), (inputs3d,), raise_exception=True)
return gradinput, gradweight, gradbias # 将自定义function封装到函数中 def mylinear(x, weight, bias): return LinearFunction.apply(x, weight, bias) x = torch.tensor([[2]], dtype=torch.float32, requires_grad=True) weight = torch.ones(2, 1, dtype=torch.float32, requires_grad=True) bias = torch.ones(2, dtype=torch.float32, requires_grad=True) z = mylinear(x, weight, bias) print(z.grad_fn) print(z.grad_fn.apply(torch.ones(1, 2))) print('梯度检查LinearFunc: ', gradcheck(mylinear, (x, weight, bias), eps=1e-3)) print('自定义一个Linear层'.center(30, '=')) class MyLinear(nn.Module): def __init__(self, input_features, output_features, bias=True): super(MyLinear, self).__init__() self.input_features = input_features self.output_features = output_features # 定义参数,Parameter类型是tensor的子类,requires_grad默认True self.weight = nn.Parameter( torch.Tensor(output_features, input_features)) if bias: self.bias = nn.Parameter(torch.Tensor(output_features)) # 初始化参数
def test_gradcheck(self, device): batch_size, channels, height, width = 1, 2, 5, 4 img = torch.rand(batch_size, channels, height, width).to(device) img = utils.tensor_to_gradcheck_var(img) # to var assert gradcheck(kornia.filters.sobel, (img, True), raise_exception=True)
def test_gradcheck(self, device): input = torch.rand(1, 2, 5, 7).to(device) input = utils.tensor_to_gradcheck_var(input) # to var assert gradcheck(F.random_perspective, (input, 0., 1.), raise_exception=True)
a.grad.data *= 0 forget.grad.data *= 0 last_h.grad.data *= 0 resultb = ForgetMult()(forget, a, last_h, use_cuda=False) print(resultb.size()) loss = resultb.pow(2).sum() loss.backward() print('Result =', loss.data[0]) print('X grad =', a.grad.mean().data[0]) print('Forget grad =', forget.grad.mean().data[0]) print('Last H grad =', last_h.grad.mean().data[0]) ### print() print('=-=-' * 5) print('(Xgrad - Xgrad).sum() =', (x_grad_copy - a.grad).sum().data[0]) print('Residual error for result') print('=-=-' * 5) residual = (resulta - resultb) print(residual.abs().sum().data[0]) # Had to loosen gradient checking, potentially due to general floating point badness? from torch.autograd import gradcheck inputs = [forget, a, last_h] test = gradcheck(ForgetMult(), inputs, eps=1e-4, atol=1e-2) print(test)
def test_gradcheck(self): batch_size, channels, height, width = 2, 3, 4, 5 img = torch.ones(batch_size, channels, height, width) img = utils.tensor_to_gradcheck_var(img) # to var assert gradcheck(kornia.adjust_brightness, (img, 2.), raise_exception=True)
def test_many_times(self): input = (Variable(tensorType(50, 100).uniform_(), requires_grad=True), ) self.assertTrue(gradcheck(Digamma(), input, eps=1e-6, atol=1e-3))
def test_gradcheck(self, device): matrix = torch.eye(3).to(device) matrix = tensor_to_gradcheck_var(matrix) # evaluate function gradient assert gradcheck(kornia.rotation_matrix_to_quaternion, (matrix, ), raise_exception=True)
X = torch.autograd.Variable(torch.rand(size), requires_grad=True).cuda() qrnn = QRNN(input_size, hidden_size, num_layers=2, dropout=0.4) qrnn.cuda() output, hidden = qrnn(X) assert list(output.size()) == [7, 20, 256] assert list(hidden.size()) == [2, 20, 256] ### seq_len, batch_size, hidden_size = 2, 2, 16 seq_len, batch_size, hidden_size = 35, 8, 32 size = (seq_len, batch_size, hidden_size) X = Variable(torch.rand(size), requires_grad=True).cuda() print(X.size()) qrnn = QRNNLayer(hidden_size, hidden_size) qrnn.cuda() Y, _ = qrnn(X) qrnn.use_cuda = False Z, _ = qrnn(X) diff = (Y - Z).sum().data[0] print('Total difference between QRNN(use_cuda=True) and QRNN(use_cuda=False) results:', diff) assert diff < 1e-5, 'CUDA and non-CUDA QRNN layers return different results' from torch.autograd import gradcheck inputs = [X,] test = gradcheck(QRNNLayer(hidden_size, hidden_size).cuda(), inputs) print(test)
from torch.autograd import Function class LinearFunction(Function): @staticmethod def forward(ctx, input, weight, bias=None): ctx.save_for_backward(input, weight, bias) output = input.mm(weight.t()) if bias is not None: output += bias.unsqueeze(0).expand_as(output) return output @staticmethod def backward(ctx, grad_output): input, weight, bias = ctx.saved_tensors grad_input = grad_weight = grad_bias = None if ctx.needs_input_grad[0]: grad_input = grad_output.mm(weight) if ctx.needs_input_grad[1]: grad_weight = grad_output.t().mm(input) if bias is not None and ctx.needs_input_grad[2]: grad_bias = grad_output.sum(0).squeeze(0) return grad_input, grad_weight, grad_bias linear = LinearFunction.apply from torch.autograd import gradcheck input = (torch.randn(20, 20, dtype=torch.double, requires_grad=True), torch.randn(30, 20, dtype=torch.double, requires_grad=True)) test = gradcheck(linear, input, eps=1e-6, atol=1e-4) print(test)
def test_many_times(self): a = Variable(tensorType(71, 23).uniform_() * 10, requires_grad=True) b = Variable(tensorType(71, 23).uniform_() * 10, requires_grad=True) result = gradcheck(Beta(), (a, b), eps=1e-6, atol=1e-3) self.assertTrue(result)
parser.add_argument('example', choices=['py', 'cpp', 'cuda']) parser.add_argument('-b', '--batch-size', type=int, default=3) parser.add_argument('-f', '--features', type=int, default=17) parser.add_argument('-s', '--state-size', type=int, default=5) parser.add_argument('-c', '--cuda', action='store_true') options = parser.parse_args() if options.example == 'py': from python.lltm_baseline import LLTMFunction elif options.example == 'cpp': from cpp.lltm import LLTMFunction else: from cuda.lltm import LLTMFunction options.cuda = True X = torch.randn(options.batch_size, options.features) h = torch.randn(options.batch_size, options.state_size) C = torch.randn(options.batch_size, options.state_size) W = torch.randn(3 * options.state_size, options.features + options.state_size) b = torch.randn(1, 3 * options.state_size) variables = [X, W, b, h, C] for i, var in enumerate(variables): if options.cuda: var = var.cuda() variables[i] = Variable(var.double(), requires_grad=True) if gradcheck(LLTMFunction.apply, variables): print('Ok')
def test_sparsemax_grad(): for _ in range(10): x = torch.randn(4, 6, dtype=torch.float64, requires_grad=True) gradcheck(sparsemax_bisect, (x,), eps=1e-5)
bn2 = torch.nn.BatchNorm2d(3, eps=0, affine=False) # bn.train() bn1 = batchnormsync.BatchNormSync(3, eps=0, affine=True, device_ids=[0]) bn1.train() if cuda: bn = torch.nn.DataParallel(bn) bn2 = torch.nn.DataParallel(bn2) bn = bn.cuda() bn1 = bn1.cuda() bn2 = bn2.cuda() input = input.cuda() inputs = (Variable(input, requires_grad=True),) # output = bn(inputs[0]) # output1 = bn1(inputs[0]) # output2 = bn2(inputs[0]) # print((output1 - output2).abs().max()) # print((output - output2).abs().max()) # test = gradcheck(bn, inputs, eps=1e-4, atol=1e-4, rtol=1e-8) for i in range(1000): logger.info(i) start_time = time.time() test = gradcheck(bn, inputs, eps=1e-4, atol=1e-2, rtol=1e-3) logger.info('%s %f', test, time.time() - start_time)