def transform_points(self): """ computes the warp of self.pts under the current tps params """ fill_mat(self.pt_w_ptrs, self.trans_d_ptrs, self.dims_gpu, self.N) dot_batch_nocheck(self.pts, self.lin_dd, self.pts_w, self.pt_ptrs, self.lin_dd_ptrs, self.pt_w_ptrs) dot_batch_nocheck(self.kernels, self.w_nd, self.pts_w, self.kernel_ptrs, self.w_nd_ptrs, self.pt_w_ptrs) sync()
def check_transform_pts(ctx, i = 0): import scikits.cuda.linalg as la n = ctx.dims[i] w_nd = ctx.w_nd[i].get()[:n] lin_dd = ctx.lin_dd[i].get() trans_d = ctx.trans_d[i].get() k_nn = ctx.kernels[i].get()[:n, :n].reshape(n, n).copy() x_nd = ctx.pts[i].get()[:n] xw_nd = ctx.pts_w[i].get()[:n] _k_gpu = gpuarray.to_gpu(k_nn) _x_gpu = gpuarray.to_gpu(x_nd) _lin_gpu = gpuarray.to_gpu(lin_dd) _trans_gpu = gpuarray.to_gpu(trans_d) _w_gpu = gpuarray.to_gpu(w_nd) fill_mat(ctx.pt_w_ptrs, ctx.trans_d_ptrs, ctx.dims_gpu, ctx.N) dot_batch_nocheck(ctx.pts, ctx.lin_dd, ctx.pts_w, ctx.pt_ptrs, ctx.lin_dd_ptrs, ctx.pt_w_ptrs) xw_nd = ctx.pts_w[i].get()[:n] cpu_xw_nd = np.dot(x_nd, lin_dd) + trans_d[None, :] # assert np.allclose(xw_nd, cpu_xw_nd) dot_batch_nocheck(ctx.kernels, ctx.w_nd, ctx.pts_w, ctx.kernel_ptrs, ctx.w_nd_ptrs, ctx.pt_w_ptrs) xw_nd = ctx.pts_w[i].get()[:n] cpu_xw_nd = cpu_xw_nd + np.dot(k_nn, w_nd) # print "w_nd\n", w_nd[:3], np.max(w_nd) # print "lin_dd\n", lin_dd[:3] # print "trans_d\n", trans_d # print "k_nn\n", k_nn[:3, :3] # print "x_nd\n", x_nd[:3, :3] # print cpu_xw_nd[:3] if not(np.allclose(xw_nd, cpu_xw_nd) ): print "k dot w_nd is difference on cpu and gpu" k_dot_w = np.dot(k_nn, w_nd) k_gpu = [gpuarray.to_gpu(k_nn)] w_gpu = [gpuarray.to_gpu(w_nd)] res_gpu = [gpuarray.zeros((n, DATA_DIM), np.float32)] k_ptrs = get_gpu_ptrs(k_gpu) w_ptrs = get_gpu_ptrs(w_gpu) res_ptrs = get_gpu_ptrs(res_gpu) dot_batch_nocheck(k_gpu, w_gpu, res_gpu, k_ptrs, w_ptrs, res_ptrs) res = res_gpu[0].get() single_gpu = la.dot(_k_gpu, _w_gpu) print "retry success {}".format(np.allclose(res, k_dot_w)) print "gpu success {}".format(np.allclose(single_gpu.get(), res)) assert np.allclose(single_gpu.get(), res) raw_input("go?")
def transform_trajs(self): """ computes the warp of l_traj and r_traj under current tps params """ fill_mat(self.l_traj_w_ptrs, self.trans_d_ptrs, self.l_traj_dims_gpu, self.N) fill_mat(self.r_traj_w_ptrs, self.trans_d_ptrs, self.r_traj_dims_gpu, self.N) dot_batch_nocheck( self.l_traj, self.lin_dd, self.l_traj_w, self.l_traj_ptrs, self.lin_dd_ptrs, self.l_traj_w_ptrs ) dot_batch_nocheck( self.r_traj, self.lin_dd, self.r_traj_w, self.r_traj_ptrs, self.lin_dd_ptrs, self.r_traj_w_ptrs ) dot_batch_nocheck( self.l_traj_K, self.w_nd, self.l_traj_w, self.l_traj_K_ptrs, self.w_nd_ptrs, self.l_traj_w_ptrs ) dot_batch_nocheck( self.r_traj_K, self.w_nd, self.r_traj_w, self.r_traj_K_ptrs, self.w_nd_ptrs, self.r_traj_w_ptrs ) sync()