"gnumpy = chain_rules(a, x, y, grad_np_kernel(x, y, sigma, kernel=k), b)", globals=globals(), repeat=3, number=1, ) print("Time for NumPy: {:.4f}s".format( np.median(speed_numpy[k]))) else: gnumpy = torch.zeros_like(xc).data.cpu().numpy() # Vanilla pytorch (with cuda if available, and cpu otherwise) if use_vanilla: try: aKxy_b = torch.dot( ac.view(-1), (torch_kernel(xc, yc, sigmac, kernel=k) @ bc).view(-1)) g3 = torch.autograd.grad(aKxy_b, xc, create_graph=False)[0].cpu() torch.cuda.synchronize() speed_pytorch[k] = np.array( timeit.repeat( setup= "cost = torch.dot(ac.view(-1), (torch_kernel(xc, yc, sigmac, kernel=k) @ bc).view(-1))", stmt= "g3 = torch.autograd.grad(cost, xc, create_graph=False)[0] ; torch.cuda.synchronize()", globals=globals(), repeat=REPEAT, number=1, )) print( "Time for PyTorch: {:.4f}s".format( np.median(speed_pytorch[k])),
for k in kernel_to_test: print('kernel: ' + k) # Pure numpy g_numpy = np.matmul(np_kernel(x, y, sigma, kernel=k), b) speed_numpy[k] = timeit.repeat( 'gnumpy = np.matmul( np_kernel(x, y, sigma, kernel=k), b)', globals=globals(), repeat=5, number=1) print('Time for NumPy: {:.4f}s'.format( np.median(speed_numpy[k]))) # Vanilla pytorch (with cuda if available, and cpu otherwise) try: g_pytorch = torch_kernel(xc, yc, sigmac, kernel=k) @ bc torch.cuda.synchronize() speed_pytorch[k] = np.array( timeit.repeat( "torch_kernel(xc, yc, sigmac, kernel=k) @ bc; torch.cuda.synchronize()", globals=globals(), repeat=REPEAT, number=4)) / 4 print('Time for PyTorch: {:.4f}s'.format( np.median(speed_pytorch[k])), end='') print(' (absolute error: ', np.max(np.abs(g_pytorch.cpu().numpy() - g_numpy)), ')') except: print('Time for PyTorch: Not Done')
speed_pykeops_gen = timeit.Timer( 'g1 = kernel_product( params,xc,yc,bc, mode=mode).cpu()', GC, globals=globals(), timer=time.time).timeit(LOOPS) print( "Time for keops generic: {:.4f}s".format(speed_pykeops_gen), end="") print(" (absolute error: ", np.max(np.abs(g1.data.numpy() - gnumpy)), ")") except: pass # vanilla pytorch (with cuda if available else uses cpu) try: g0 = torch.mm(torch_kernel(xc, yc, sigmac, kernel=k), bc).cpu().numpy() speed_pytorch = timeit.Timer( 'g0 = torch.mm(torch_kernel(xc,yc,sigmac,kernel=k),bc)#.cpu().numpy()', GC, globals=globals(), timer=time.time).timeit(LOOPS) print("Time for Pytorch: {:.4f}s".format(speed_pytorch), end="") print(" (absolute error: ", np.max(np.abs(g0 - gnumpy)), ")") except: pass # specific cuda tiled implementation (if cuda is available) if gpu_available: try: from pykeops.numpy.convolutions.radial_kernels import radial_kernels_conv