"gnumpy = chain_rules(a, x, y, grad_np_kernel(x, y, sigma, kernel=k), b)",
            globals=globals(),
            repeat=3,
            number=1,
        )
        print("Time for NumPy:               {:.4f}s".format(
            np.median(speed_numpy[k])))
    else:
        gnumpy = torch.zeros_like(xc).data.cpu().numpy()

    # Vanilla pytorch (with cuda if available, and cpu otherwise)
    if use_vanilla:
        try:
            aKxy_b = torch.dot(
                ac.view(-1),
                (torch_kernel(xc, yc, sigmac, kernel=k) @ bc).view(-1))
            g3 = torch.autograd.grad(aKxy_b, xc, create_graph=False)[0].cpu()
            torch.cuda.synchronize()
            speed_pytorch[k] = np.array(
                timeit.repeat(
                    setup=
                    "cost = torch.dot(ac.view(-1), (torch_kernel(xc, yc, sigmac, kernel=k) @ bc).view(-1))",
                    stmt=
                    "g3 = torch.autograd.grad(cost, xc, create_graph=False)[0] ; torch.cuda.synchronize()",
                    globals=globals(),
                    repeat=REPEAT,
                    number=1,
                ))
            print(
                "Time for PyTorch:             {:.4f}s".format(
                    np.median(speed_pytorch[k])),
Esempio n. 2
0
for k in kernel_to_test:
    print('kernel: ' + k)

    # Pure numpy
    g_numpy = np.matmul(np_kernel(x, y, sigma, kernel=k), b)
    speed_numpy[k] = timeit.repeat(
        'gnumpy = np.matmul( np_kernel(x, y, sigma, kernel=k), b)',
        globals=globals(),
        repeat=5,
        number=1)
    print('Time for NumPy:               {:.4f}s'.format(
        np.median(speed_numpy[k])))

    # Vanilla pytorch (with cuda if available, and cpu otherwise)
    try:
        g_pytorch = torch_kernel(xc, yc, sigmac, kernel=k) @ bc
        torch.cuda.synchronize()
        speed_pytorch[k] = np.array(
            timeit.repeat(
                "torch_kernel(xc, yc, sigmac, kernel=k) @ bc; torch.cuda.synchronize()",
                globals=globals(),
                repeat=REPEAT,
                number=4)) / 4

        print('Time for PyTorch:             {:.4f}s'.format(
            np.median(speed_pytorch[k])),
              end='')
        print('   (absolute error:       ',
              np.max(np.abs(g_pytorch.cpu().numpy() - g_numpy)), ')')
    except:
        print('Time for PyTorch:             Not Done')
Esempio n. 3
0
        speed_pykeops_gen = timeit.Timer(
            'g1 = kernel_product( params,xc,yc,bc,  mode=mode).cpu()',
            GC,
            globals=globals(),
            timer=time.time).timeit(LOOPS)
        print(
            "Time for keops generic:       {:.4f}s".format(speed_pykeops_gen),
            end="")
        print("   (absolute error:       ",
              np.max(np.abs(g1.data.numpy() - gnumpy)), ")")
    except:
        pass

    # vanilla pytorch (with cuda if available else uses cpu)
    try:
        g0 = torch.mm(torch_kernel(xc, yc, sigmac, kernel=k), bc).cpu().numpy()
        speed_pytorch = timeit.Timer(
            'g0 = torch.mm(torch_kernel(xc,yc,sigmac,kernel=k),bc)#.cpu().numpy()',
            GC,
            globals=globals(),
            timer=time.time).timeit(LOOPS)
        print("Time for Pytorch:             {:.4f}s".format(speed_pytorch),
              end="")
        print("   (absolute error:       ", np.max(np.abs(g0 - gnumpy)), ")")
    except:
        pass

    # specific cuda tiled implementation (if cuda is available)
    if gpu_available:
        try:
            from pykeops.numpy.convolutions.radial_kernels import radial_kernels_conv