예제 #1
0
    def get_solver(self, x_na, K_nn, bend_coefs, rot_coef):
        n,d = x_na.shape
        assert len(bend_coefs) <= len(self.bend_coefs)
        assert n <= self.max_N

        if not self.cur_solver is None:
            self.cur_solver.valid = False

        Q = np.c_[np.ones((n, 1)), x_na, K_nn]
        A = np.r_[np.zeros((d+1, d+1)), np.c_[np.ones((n, 1)), x_na]].T
        
        R = np.zeros((n+d+1, d))
        R[1:d+1, :d] = np.diag(rot_coef)
    
        n_cnts = A.shape[0]    
        _u,_s,_vh = np.linalg.svd(A.T)
        N = _u[:,n_cnts:].copy()
        N_gpu = self.N_gpu[:(n+d+1)*n].reshape(n+d+1, n)
        N_gpu.set_async(N)
        QN = Q.dot(N)
        QN_gpu = self.QN_gpu[:n*n].reshape(n, n)
        QN_gpu.set_async(QN)
        WQN_gpu = self.WQN_gpu[:n*n].reshape(n, n)
        NHN_gpu = self.NHN_gpu[:n*n].reshape(n, n)
        NR = N.T.dot(R)
        
        N_arr_gpu = []
        O_gpu = []
        ON_gpu = []
        NON_gpu = []
        for i, b in enumerate(bend_coefs):
            O_b = np.zeros((n+d+1, n+d+1), np.float64)
            O_b[d+1:, d+1:] += b * K_nn
            O_b[1:d+1, 1:d+1] += np.diag(rot_coef)
            offset = i * (n+d+1)*(n+d+1)
            O_gpu.append(self.O_gpu[offset:offset + (n+d+1)*(n+d+1)].reshape(n+d+1, n+d+1))
            O_gpu[-1].set(O_b)

            offset = i * (n)*(n+d+1)
            ON_gpu.append(self.ON_gpu[offset:offset + n*(n+d+1)].reshape(n+d+1, n))
            offset = i * n * n
            NON_gpu.append(self.NON_gpu[offset:offset + n*n].reshape(n, n))
            N_arr_gpu.append(N_gpu)
        O_ptrs = get_gpu_ptrs(O_gpu)
        ON_ptrs = get_gpu_ptrs(ON_gpu)
        NON_ptrs = get_gpu_ptrs(NON_gpu)
        N_ptrs = get_gpu_ptrs(N_arr_gpu)

        dot_batch_nocheck(O_gpu,  N_arr_gpu, ON_gpu,
                          O_ptrs, N_ptrs,    ON_ptrs,
                          b = 0)
        dot_batch_nocheck(N_arr_gpu, ON_gpu,  NON_gpu,
                          N_ptrs,    ON_ptrs, NON_ptrs,
                          transa='T', b = 0)
        NON_gpu = dict(zip(bend_coefs, NON_gpu))
        NON = dict([(b, non.get_async()) for b, non in NON_gpu.iteritems()])
        self.cur_solver = TPSSolver(bend_coefs, N, QN, NON, NR, x_na, K_nn, rot_coef,
                                    QN_gpu, WQN_gpu, NON_gpu, NHN_gpu)
        return self.cur_solver
예제 #2
0
파일: batchtps.py 프로젝트: rll/lfd
 def transform_points(self):
     """
     computes the warp of self.pts under the current tps params
     """
     fill_mat(self.pt_w_ptrs, self.trans_d_ptrs, self.dims_gpu, self.N)
     dot_batch_nocheck(self.pts, self.lin_dd, self.pts_w, self.pt_ptrs, self.lin_dd_ptrs, self.pt_w_ptrs)
     dot_batch_nocheck(self.kernels, self.w_nd, self.pts_w, self.kernel_ptrs, self.w_nd_ptrs, self.pt_w_ptrs)
     sync()
예제 #3
0
파일: batchtps.py 프로젝트: rll/lfd
 def update_transform(self, b):
     """
     computes the TPS associated with the current target pts
     """
     self.set_tps_params(self.offset_mats[b])
     dot_batch_nocheck(
         self.proj_mats[b], self.pts_t, self.tps_params, self.proj_mat_ptrs[b], self.pt_t_ptrs, self.tps_param_ptrs
     )
     sync()
예제 #4
0
파일: batchtps.py 프로젝트: rll/lfd
    def bending_cost(self, b=DEFAULT_LAMBDA[1]):
        ## b * w_nd' * K * w_nd
        ## use pts_w as temporary storage
        dot_batch_nocheck(self.kernels, self.w_nd, self.pts_w, self.kernel_ptrs, self.w_nd_ptrs, self.pt_w_ptrs, b=0)

        dot_batch_nocheck(
            self.pts_w, self.w_nd, self.bend_res, self.pt_w_ptrs, self.w_nd_ptrs, self.bend_res_ptrs, transa="T", b=0
        )
        bend_res = self.bend_res_mat.get()
        return b * np.array([np.trace(bend_res[i * DATA_DIM : (i + 1) * DATA_DIM]) for i in range(self.N)])
예제 #5
0
파일: batchtps.py 프로젝트: rll/lfd
 def transform_trajs(self):
     """
     computes the warp of l_traj and r_traj under current tps params
     """
     fill_mat(self.l_traj_w_ptrs, self.trans_d_ptrs, self.l_traj_dims_gpu, self.N)
     fill_mat(self.r_traj_w_ptrs, self.trans_d_ptrs, self.r_traj_dims_gpu, self.N)
     dot_batch_nocheck(
         self.l_traj, self.lin_dd, self.l_traj_w, self.l_traj_ptrs, self.lin_dd_ptrs, self.l_traj_w_ptrs
     )
     dot_batch_nocheck(
         self.r_traj, self.lin_dd, self.r_traj_w, self.r_traj_ptrs, self.lin_dd_ptrs, self.r_traj_w_ptrs
     )
     dot_batch_nocheck(
         self.l_traj_K, self.w_nd, self.l_traj_w, self.l_traj_K_ptrs, self.w_nd_ptrs, self.l_traj_w_ptrs
     )
     dot_batch_nocheck(
         self.r_traj_K, self.w_nd, self.r_traj_w, self.r_traj_K_ptrs, self.w_nd_ptrs, self.r_traj_w_ptrs
     )
     sync()
예제 #6
0
파일: batchtps.py 프로젝트: rll/lfd
def check_transform_pts(ctx, i=0):
    import scikits.cuda.linalg as la

    n = ctx.dims[i]
    w_nd = ctx.w_nd[i].get()[:n]
    lin_dd = ctx.lin_dd[i].get()
    trans_d = ctx.trans_d[i].get()
    k_nn = ctx.kernels[i].get()[:n, :n].reshape(n, n).copy()
    x_nd = ctx.pts[i].get()[:n]
    xw_nd = ctx.pts_w[i].get()[:n]

    _k_gpu = gpuarray.to_gpu(k_nn)
    _x_gpu = gpuarray.to_gpu(x_nd)
    _lin_gpu = gpuarray.to_gpu(lin_dd)
    _trans_gpu = gpuarray.to_gpu(trans_d)
    _w_gpu = gpuarray.to_gpu(w_nd)

    fill_mat(ctx.pt_w_ptrs, ctx.trans_d_ptrs, ctx.dims_gpu, ctx.N)
    dot_batch_nocheck(ctx.pts, ctx.lin_dd, ctx.pts_w, ctx.pt_ptrs, ctx.lin_dd_ptrs, ctx.pt_w_ptrs)

    xw_nd = ctx.pts_w[i].get()[:n]
    cpu_xw_nd = np.dot(x_nd, lin_dd) + trans_d[None, :]
    # assert np.allclose(xw_nd, cpu_xw_nd)

    dot_batch_nocheck(ctx.kernels, ctx.w_nd, ctx.pts_w, ctx.kernel_ptrs, ctx.w_nd_ptrs, ctx.pt_w_ptrs)
    xw_nd = ctx.pts_w[i].get()[:n]
    cpu_xw_nd = cpu_xw_nd + np.dot(k_nn, w_nd)
    # print "w_nd\n", w_nd[:3], np.max(w_nd)
    # print "lin_dd\n", lin_dd[:3]
    # print "trans_d\n", trans_d
    # print "k_nn\n", k_nn[:3, :3]
    # print "x_nd\n", x_nd[:3, :3]
    # print cpu_xw_nd[:3]
    if not (np.allclose(xw_nd, cpu_xw_nd)):
        print "k dot w_nd is difference on cpu and gpu"
        k_dot_w = np.dot(k_nn, w_nd)
        k_gpu = [gpuarray.to_gpu(k_nn)]
        w_gpu = [gpuarray.to_gpu(w_nd)]
        res_gpu = [gpuarray.zeros((n, DATA_DIM), np.float32)]
        k_ptrs = get_gpu_ptrs(k_gpu)
        w_ptrs = get_gpu_ptrs(w_gpu)
        res_ptrs = get_gpu_ptrs(res_gpu)
        dot_batch_nocheck(k_gpu, w_gpu, res_gpu, k_ptrs, w_ptrs, res_ptrs)
        res = res_gpu[0].get()
        single_gpu = la.dot(_k_gpu, _w_gpu)
        print "retry success {}".format(np.allclose(res, k_dot_w))
        print "gpu success {}".format(np.allclose(single_gpu.get(), res))
        assert np.allclose(single_gpu.get(), res)
        raw_input("go?")