def test_idcct2Random(self): torch.manual_seed(10) M = 4 N = 8 x = torch.empty(M, N, dtype=torch.int32).random_(0, 10).double() print("2D x") print(x) golden_value = discrete_spectral_transform.idcct2(x).data.numpy() print("2D golden_value") print(golden_value) # test cpu #pdb.set_trace() custom = dct.IDCCT2() dst_value = custom.forward(x) print("dxt_value") print(dst_value.data.numpy()) np.testing.assert_allclose(dst_value.data.numpy(), golden_value, atol=1e-14) # test cpu #pdb.set_trace() custom = dct_lee.IDCCT2() dst_value = custom.forward(x) print("dxt_value") print(dst_value.data.numpy()) np.testing.assert_allclose(dst_value.data.numpy(), golden_value, atol=1e-14) # test gpu if torch.cuda.device_count(): custom = dct.IDCCT2() dst_value = custom.forward(x.cuda()).cpu() print("dxt_value cuda") print(dst_value.data.numpy()) np.testing.assert_allclose(dst_value.data.numpy(), golden_value, atol=1e-14) # test gpu if torch.cuda.device_count(): custom = dct_lee.IDCCT2() dst_value = custom.forward(x.cuda()).cpu() print("dxt_value cuda") print(dst_value.data.numpy()) np.testing.assert_allclose(dst_value.data.numpy(), golden_value, atol=1e-14)
def eval_runtime(): #x = torch.tensor([1, 2, 7, 9, 20, 31], dtype=torch.float64) #print(dct_N(x)) N = 512 runs = 10 x = torch.empty(10, N, N, dtype=torch.float64).uniform_(0, 10.0).cuda() perm = discrete_spectral_transform.get_perm(N, dtype=torch.int64, device=x.device) expk = discrete_spectral_transform.get_expk(N, dtype=x.dtype, device=x.device) #x_numpy = x.data.cpu().numpy() #tt = time.time() #for i in range(runs): # y = fftpack.dct(fftpack.dct(x_numpy[i%10].T, norm=None).T, norm=None) #print("scipy takes %.3f sec" % (time.time()-tt)) ## 9s for 200 iterations 1024x1024 on GTX 1080 #torch.cuda.synchronize() #tt = time.time() ##with torch.autograd.profiler.profile(use_cuda=True) as prof: #for i in range(runs): # y_2N = dct2_2N(x[0], expk0=expk, expk1=expk) #torch.cuda.synchronize() ##print(prof) #print("dct_2N takes %.3f ms" % ((time.time()-tt)/runs*1000)) ## 11s for 200 iterations 1024x1024 on GTX 1080 #torch.cuda.synchronize() #tt = time.time() ##with torch.autograd.profiler.profile(use_cuda=True) as prof: #for i in range(runs): # y_N = discrete_spectral_transform.dct2_N(x[i%10], perm0=perm, expk0=expk, perm1=perm, expk1=expk) #torch.cuda.synchronize() ##print(prof) #print("dct_N takes %.3f ms" % ((time.time()-tt)/runs*1000)) #dct2func = dct.DCT2(expk, expk, algorithm='2N') #torch.cuda.synchronize() #tt = time.time() ##with torch.autograd.profiler.profile(use_cuda=True) as prof: #for i in range(runs): # y_N = dct2func.forward(x[0]) #torch.cuda.synchronize() ##print(prof) #print("DCT2Function 2N takes %.3f ms" % ((time.time()-tt)/runs*1000)) #dct2func = dct.DCT2(expk, expk, algorithm='N') #torch.cuda.synchronize() #tt = time.time() ##with torch.autograd.profiler.profile(use_cuda=True) as prof: #for i in range(runs): # y_N = dct2func.forward(x[0]) #torch.cuda.synchronize() ##print(prof) #print("DCT2Function takes %.3f ms" % ((time.time()-tt)/runs*1000)) #dct2func = dct_lee.DCT2(expk, expk) #torch.cuda.synchronize() #tt = time.time() ##with torch.autograd.profiler.profile(use_cuda=True) as prof: #for i in range(runs): # y_N = dct2func.forward(x[i%10]) #torch.cuda.synchronize() ##print(prof) #print("DCT2Function lee takes %.3f ms" % ((time.time()-tt)/runs*1000)) #torch.cuda.synchronize() #tt = time.time() ##with torch.autograd.profiler.profile(use_cuda=True) as prof: #for i in range(runs): # y_N = discrete_spectral_transform.idct2_2N(x[i%10], expk0=expk, expk1=expk) #torch.cuda.synchronize() ##print(prof) #print("idct2_2N takes %.3f ms" % ((time.time()-tt)/runs*1000)) idct2func = dct.IDCT2(expk, expk, algorithm='2N') torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = idct2func.forward(x[i%10]) torch.cuda.synchronize() #print(prof) print("IDCT2Function 2N takes %.3f ms" % ((time.time()-tt)/runs*1000)) idct2func = dct.IDCT2(expk, expk, algorithm='N') torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = idct2func.forward(x[i%10]) torch.cuda.synchronize() #print(prof) print("IDCT2Function takes %.3f ms" % ((time.time()-tt)/runs*1000)) exit() #torch.cuda.synchronize() #tt = time.time() ##with torch.autograd.profiler.profile(use_cuda=True) as prof: #for i in range(runs): # y_N = discrete_spectral_transform.idxt(x[i%10], 0, expk=expk) #torch.cuda.synchronize() ##print(prof) #print("idxt takes %.3f ms" % ((time.time()-tt)/runs*1000)) #idxct_func = dct.IDXCT(expk) #torch.cuda.synchronize() #tt = time.time() ##with torch.autograd.profiler.profile(use_cuda=True) as prof: #for i in range(runs): # y_N = idxct_func.forward(x[i%10]) #torch.cuda.synchronize() ##print(prof) #print("IDXCTFunction takes %.3f ms" % ((time.time()-tt)/runs*1000)) torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = torch.rfft(x[i%10].view([1, N, N]), signal_ndim=2, onesided=False) torch.cuda.synchronize() #print(prof) print("fft2 takes %.3f ms" % ((time.time()-tt)/runs*1000)) torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = discrete_spectral_transform.idcct2(x[i%10], expk_0=expk, expk_1=expk) torch.cuda.synchronize() #print(prof) print("idcct2 takes %.3f ms" % ((time.time()-tt)/runs*1000)) func = dct.IDCCT2(expk, expk) torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = func.forward(x[i%10]) torch.cuda.synchronize() #print(prof) print("IDCCT2Function takes %.3f ms" % ((time.time()-tt)/runs*1000)) torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = discrete_spectral_transform.idcst2(x[i%10], expk_0=expk, expk_1=expk) torch.cuda.synchronize() #print(prof) print("idcst2 takes %.3f ms" % ((time.time()-tt)/runs*1000)) func = dct.IDCST2(expk, expk) torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = func.forward(x[i%10]) torch.cuda.synchronize() #print(prof) print("IDCST2Function takes %.3f ms" % ((time.time()-tt)/runs*1000)) torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = discrete_spectral_transform.idsct2(x[i%10], expk_0=expk, expk_1=expk) torch.cuda.synchronize() #print(prof) print("idsct2 takes %.3f ms" % ((time.time()-tt)/runs*1000)) func = dct.IDSCT2(expk, expk) torch.cuda.synchronize() tt = time.time() #with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = func.forward(x[i%10]) torch.cuda.synchronize() #print(prof) print("IDSCT2Function takes %.3f ms" % ((time.time()-tt)/runs*1000))
# compute auv auv = discrete_spectral_transform.dct2(density_map, expk_M, expk_N) auv[0, :].mul_(0.5) auv[:, 0].mul_(0.5) ratio = auv.numpy() / auv_map print("auv/auv_map") print(ratio) # compute potential phi auv_by_wu2_plus_wv2 = auv.mul(inv_wu2_plus_wv2_2X).mul(2) ratio = auv_by_wu2_plus_wv2.numpy() / phi_in_map print("auv_by_wu2_plus_wv2/phi_in_map") print(ratio) # auv / (wu**2 + wv**2) potential_map = discrete_spectral_transform.idcct2(auv_by_wu2_plus_wv2, expk_M, expk_N) ratio = potential_map.numpy() / phi_out_map #plot(0, potential_map.numpy(), 0, "%d.potential_map" % (0)) print("potential_map/phi_out_map") print(ratio) # compute field xi auv_by_wu2_plus_wv2_wu = auv.mul(wu_by_wu2_plus_wv2) ratio = auv_by_wu2_plus_wv2_wu.numpy() / ex_in_map print("auv_by_wu2_plus_wv2_wu / ex_in_map") print(ratio) field_map_x = discrete_spectral_transform.idsct2(auv_by_wu2_plus_wv2_wu, expk_M, expk_N) ratio = field_map_x.numpy() / ex_out_map print("field_map_x/ex_out_map") print(ratio) pdb.set_trace() auv_by_wu2_plus_wv2_wv = auv.mul(wv_by_wu2_plus_wv2)
def eval_others(x, expk0, expk1, expkM, expkN, runs): y_N = discrete_spectral_transform.idcct2(x, expk_0=expk0, expk_1=expk1) torch.cuda.synchronize() tt = time.time() # with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = discrete_spectral_transform.idcct2(x, expk_0=expk0, expk_1=expk1) torch.cuda.synchronize() # print(prof) print("idcct2 takes %.7f ms" % ((time.time()-tt)/runs*1000)) func = dct.IDCCT2(expk0, expk1) y_N = func.forward(x) torch.cuda.synchronize() tt = time.time() # with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = func.forward(x) torch.cuda.synchronize() # print(prof) print("IDCCT2 Function takes %.7f ms" % ((time.time()-tt)/runs*1000)) y_N = discrete_spectral_transform.idcst2(x, expk_0=expk0, expk_1=expk1) torch.cuda.synchronize() tt = time.time() # with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = discrete_spectral_transform.idcst2(x, expk_0=expk0, expk_1=expk1) torch.cuda.synchronize() # print(prof) print("idcst2 takes %.7f ms" % ((time.time()-tt)/runs*1000)) func = dct.IDCST2(expk0, expk1) y_N = func.forward(x) torch.cuda.synchronize() tt = time.time() # with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = func.forward(x) torch.cuda.synchronize() # print(prof) print("IDCST2 Function takes %.7f ms" % ((time.time()-tt)/runs*1000)) y_N = discrete_spectral_transform.idsct2(x, expk_0=expk0, expk_1=expk1) torch.cuda.synchronize() tt = time.time() # with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = discrete_spectral_transform.idsct2(x, expk_0=expk0, expk_1=expk1) torch.cuda.synchronize() # print(prof) print("idsct2 takes %.7f ms" % ((time.time()-tt)/runs*1000)) func = dct.IDSCT2(expk0, expk1) y_N = func.forward(x) torch.cuda.synchronize() tt = time.time() # with torch.autograd.profiler.profile(use_cuda=True) as prof: for i in range(runs): y_N = func.forward(x) torch.cuda.synchronize() # print(prof) print("IDSCT2 Function takes %.7f ms" % ((time.time()-tt)/runs*1000)) print("")