Beispiel #1
0
 def LMO_err(params, M=2):
     params = np.exp(params)
     al, bl = params[:-1], params[-1]
     L = bl * bl * np.exp(-L0[0] / al[0] / al[0] / 2) + bl * bl * np.exp(
         -L0[1] / al[1] / al[1] /
         2) + 1e-6 * EYEN  # l(X,None,al,bl)# +1e-6*EYEN
     if nystr:
         tmp_mat = L @ eig_vec_K
         C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 +
                                         inv_eig_val_K) @ tmp_mat.T / N2
         c = C @ W_nystr_Y * N2
     else:
         LWL_inv = chol_inv(
             L @ W @ L + L / N2 + JITTER * EYEN
         )  # chol_inv(W*N2+L_inv) # chol_inv(L@W@L+L/N2 +JITTER*EYEN)
         C = L @ LWL_inv @ L / N2
         c = C @ W @ Y * N2
     c_y = c - Y
     lmo_err = 0
     N = 0
     for ii in range(1):
         permutation = np.random.permutation(X.shape[0])
         for i in range(0, X.shape[0], M):
             indices = permutation[i:i + M]
             K_i = W[np.ix_(indices, indices)] * N2
             C_i = C[np.ix_(indices, indices)]
             c_y_i = c_y[indices]
             b_y = np.linalg.inv(np.eye(C_i.shape[0]) - C_i @ K_i) @ c_y_i
             # print(I_CW_inv.shape,c_y_i.shape)
             lmo_err += b_y.T @ K_i @ b_y
             N += 1
     return lmo_err[0, 0] / N / M**2
Beispiel #2
0
 def LMO_err(params, M=10):
     np.random.seed(2)
     random.seed(2)
     al, bl = np.exp(params)
     L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN
     if nystr:
         tmp_mat = L @ eig_vec_K
         C = L - tmp_mat @ np.linalg.inv(eig_vec_K.T @ tmp_mat / N2 + inv_eig_val_K) @ tmp_mat.T / N2
         c = C @ W_nystr_Y * N2
     else:
         LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
         C = L @ LWL_inv @ L / N2
         c = C @ W @ Y * N2
     c_y = c - Y
     lmo_err = 0
     N = 0
     for ii in range(1):
         permutation = np.random.permutation(X.shape[0])
         for i in range(0, X.shape[0], M):
             indices = permutation[i:i + M]
             K_i = W[np.ix_(indices, indices)] * N2
             C_i = C[np.ix_(indices, indices)]
             c_y_i = c_y[indices]
             b_y = np.linalg.inv(np.eye(M) - C_i @ K_i) @ c_y_i
             lmo_err += b_y.T @ K_i @ b_y
             N += 1
     return lmo_err[0, 0] / N / M ** 2
Beispiel #3
0
    def get_causal_effect(params, do_A, w):
        "to be called within experiment function."
        np.random.seed(4)
        random.seed(4)
        al, bl = params
        L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN
        if nystr:
            alpha = EYEN - eig_vec_K @ np.linalg.inv(
                eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2
            alpha = alpha @ W_nystr @ Y * N2
        else:
            LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
            alpha = LWL_inv @ L @ W @ Y
            # L_W_inv = chol_inv(W*N2+L_inv)

        EYhat_do_A = []
        for a in do_A:
            a = np.repeat(a, [w.shape[0]]).reshape(-1, 1)
            w = w.reshape(-1, 1)
            aw = np.concatenate([a, w], axis=-1)
            ate_L0 = _sqdist(aw, X)
            ate_L = bl * bl * np.exp(-ate_L0 / al / al / 2)
            h_out = ate_L @ alpha

            mean_h = np.mean(h_out).reshape(-1, 1)
            EYhat_do_A.append(mean_h)
            print('a = {}, beta_a = {}'.format(np.mean(a), mean_h))

        return np.concatenate(EYhat_do_A)
    def callback0(params, timer=None):
        global Nfeval, prev_norm, opt_params, opt_test_err
        if Nfeval % 1 == 0:
            al, bl = params
            L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN
            if nystr:
                alpha = EYEN - eig_vec_K @ np.linalg.inv(
                    eig_vec_K.T @ L @ eig_vec_K / N2 + np.diag(1 / eig_val_K)) @ eig_vec_K.T @ L / N2
                alpha = alpha @ W_nystr @ Y
            else:
                LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
                alpha = LWL_inv @ L @ W @ Y
                # L_W_inv = chol_inv(W*N2+L_inv)
            test_L = bl * bl * np.exp(-test_L0 / al / al / 2)
            pred_mean = test_L @ alpha
            if timer:
                return
            test_err = ((pred_mean - test_Y) ** 2).mean()  # ((pred_mean-test_Y)**2/np.diag(pred_cov)).mean()+(np.log(np.diag(pred_cov))).mean()
            norm = alpha.T @ L @ alpha

        Nfeval += 1
        if prev_norm is not None:
            if norm[0, 0] / prev_norm >= 3:
                if opt_params is None:
                    opt_test_err = test_err
                    opt_params = params
                print(True, opt_params, opt_test_err, prev_norm)
                raise Exception

        if prev_norm is None or norm[0, 0] <= prev_norm:
            prev_norm = norm[0, 0]
        opt_test_err = test_err
        opt_params = params
        print('params,test_err, norm: ', opt_params, opt_test_err, prev_norm)
    def alt_newton_coord_descent(self,
                                 X,
                                 Y,
                                 max_iter=200,
                                 convergence_tolerance=1e-6):
        m = X.shape[1]
        self.Sxx = X.dot(X.T) / m
        self.Syy = Y.dot(Y.T) / m
        self.Sxy = X.dot(Y.T) / m

        self.nll = []
        self.lnll = []
        self.lrs = []

        converged_up_to_tolerance = False
        for t in range(max_iter):
            if t % 100 == 0:
                print('newton_iter {}='.format(X.shape[1]), t)
            # update variable params
            self.nll.append(self.neg_log_likelihood())

            # solve D_lambda via coordinate descent
            Kyy_direction = self.descent_direction_Kyy()
            if not np.isfinite(Kyy_direction).all():
                print('Newton optimization failed due to overflow.')
                return self.Kyy.copy(), self.Kyx.copy(
                ), converged_up_to_tolerance

            # line search for best step size
            learning_rate = self.learning_rate
            LL, learning_rate = self.line_search(Kyy_direction)
            self.lrs.append(learning_rate)

            prev_Kyy = np.array(self.Kyy)
            self.Kyy = self.Kyy.copy() + learning_rate * Kyy_direction

            # update variable params
            self.Kyy_inv = util.chol_inv(
                LL)  # use chol decomp from the backtracking

            # solve theta
            prev_Kyx = np.array(self.Kyx)
            self.Kyx = self.Kyx_coordinate_descent()

            if not (np.isfinite(self.Kyy_inv).all()
                    and np.isfinite(self.Kyx).all()):
                EPS = 1e-05
                self.Kyy_inv = np.linalg.inv(self.Kyy + EPS * np.eye(self.ny))
                if not np.isfinite(self.Kyy_inv).all():
                    print('Newton optimization failed due to overflow.')
                    return self.Kyy.copy(), self.Kyx.copy(
                    ), converged_up_to_tolerance

            if t > 0 and np.abs(self.nll[-1] - self.neg_log_likelihood()
                                ) < convergence_tolerance:
                converged_up_to_tolerance = True
                break
        return self.Kyy.copy(), self.Kyx.copy(), converged_up_to_tolerance
Beispiel #6
0
    def train(self):
        theta0 = self.get_default_theta()
        self.loss = np.inf
        self.theta = np.copy(theta0)

        nlz = self.neg_log_likelihood(theta0)

        def loss(theta):
            nlz = self.neg_log_likelihood(theta)
            return nlz

        def callback(theta):
            if self.nlz < self.loss:
                self.loss = self.nlz
                self.theta = np.copy(theta)

        gloss = value_and_grad(loss)
        try:
            fmin_l_bfgs_b(gloss,
                          theta0,
                          maxiter=self.bfgs_iter,
                          m=100,
                          iprint=self.debug,
                          callback=callback)
        except np.linalg.LinAlgError:
            print('GP. Increase noise term and re-optimization.')
            theta0 = np.copy(self.theta)
            theta0[0] += np.log(10)
            try:
                fmin_l_bfgs_b(gloss,
                              theta0,
                              maxiter=self.bfgs_iter,
                              m=10,
                              iprint=self.debug,
                              callback=callback)
            except:
                print('GP. Exception caught, L-BFGS early stopping...')
                if self.debug:
                    print(traceback.format_exc())
        except:
            print('GP. Exception caught, L-BFGS early stopping...')
            if self.debug:
                print(traceback.format_exc())

        sn2 = np.exp(self.theta[0])
        hyp = self.theta[1:]
        K = self.kernel(self.train_x, self.train_x, hyp) + sn2 * np.eye(
            self.num_train) + self.jitter * np.eye(self.num_train)
        self.L = np.linalg.cholesky(K)
        self.alpha = chol_inv(self.L, self.train_y.T)
        if self.k:
            self.for_diag = np.exp(self.theta[1]) * np.exp(
                self.theta[3]) + np.exp(self.theta[3 + self.dim])
        else:
            self.for_diag = np.exp(self.theta[1])
        print('GP. Finished training process.')
Beispiel #7
0
    def predict(self, test_x, is_diag=1):
        output_scale = np.exp(self.theta[0])
        sigma2_tag = np.exp(self.theta[self.dim+2])
        C = self.kernel(self.src_x, self.tag_x, self.theta)
        L_C = np.linalg.cholesky(C)
        alpha_C = chol_inv(L_C, self.train_y.T)
        k_star_s = self.kernel2(test_x, self.src_x, self.theta)
        k_star_t = self.kernel1(test_x, self.tag_x, self.theta)
        k_star = np.hstack((k_star_s, k_star_t))
        py = np.dot(k_star, alpha_C)

        Cvks = chol_inv(L_C, k_star.T)
        if is_diag:
            ps2 = output_scale + sigma2_tag - (k_star * Cvks.T).sum(axis=1)
        else:
            ps2 = self.kernel1(test_x, test_x, self.theta) + sigma2_tag - np.dot(k_star, Cvks)
        ps2 = np.abs(ps2)
        py = py * self.std + self.mean
        ps2 = ps2 * (self.std**2)
        return py, ps2
Beispiel #8
0
 def predict(self, test_x, is_diag=1):
     sn2 = np.exp(self.theta[0])
     hyp = self.theta[1:]
     K_star = self.kernel(test_x, self.train_x, hyp)
     py = np.dot(K_star, self.alpha)
     KvKs = chol_inv(self.L, K_star.T)
     if is_diag:
         ps2 = self.for_diag + sn2 - (K_star * KvKs.T).sum(axis=1)
     else:
         ps2 = sn2 - np.dot(K_star, KvKs) + self.kernel(test_x, test_x, hyp)
     ps2 = np.abs(ps2)
     py = py * self.std + self.mean
     py = py.reshape(-1)
     ps2 = ps2 * (self.std**2)
     return py, ps2
Beispiel #9
0
    def neg_log_likelihood(self, theta):
        sigma2_src = np.exp(theta[self.dim+1])
        sigma2_tag = np.exp(theta[self.dim+2])
        K_ss = self.kernel1(self.src_x, self.src_x, theta) + sigma2_src * np.eye(self.num_src) + self.jitter*np.eye(self.num_src)
        K_st = self.kernel2(self.src_x, self.tag_x, theta)
        K_ts = K_st.T
        K_tt = self.kernel1(self.tag_x, self.tag_x, theta) + sigma2_tag * np.eye(self.num_tag) + self.jitter*np.eye(self.num_tag)

        L_ss = np.linalg.cholesky(K_ss)
        tmp1 = chol_inv(L_ss, self.src_y.T)
        tmp2 = chol_inv(L_ss, K_st)
        mu_t = np.dot(K_ts, tmp1)
        C_t  = K_tt - np.dot(K_ts, tmp2)

        L_t = np.linalg.cholesky(C_t)
        logDetCt = np.sum(np.log(np.diag(L_t)))
        delta = self.tag_y.T - mu_t
        alpha = chol_inv(L_t, delta)
        nlz = 0.5*(np.dot(delta.T, alpha) + self.num_tag*np.log(2*np.pi)) + logDetCt
        if(np.isnan(nlz)):
            nlz = np.inf

        self.nlz = nlz
        return nlz
Beispiel #10
0
    def neg_log_likelihood(self, theta):
        sn2 = np.exp(theta[0])
        hyp = theta[1:]

        K = self.kernel(self.train_x, self.train_x,
                        hyp) + sn2 * np.eye(self.num_train)
        L = np.linalg.cholesky(K)

        logDetK = np.sum(np.log(np.diag(L)))
        alpha = chol_inv(L, self.train_y.T)
        nlz = 0.5 * (np.dot(self.train_y, alpha) +
                     self.num_train * np.log(2 * np.pi)) + logDetK
        if (np.isnan(nlz)):
            nlz = np.inf

        self.nlz = nlz
        return nlz
    def callback0(params):
        global Nfeval, prev_norm, opt_params, opt_test_err
        if Nfeval % 1 == 0:
            params = np.exp(params)
            print('params:', params)
            al, bl = params[:-1], params[-1]

            if train.x.shape[1] < 5:
                train_L = bl**2 * np.exp(-train_L0 / al**2 / 2) + 1e-4 * EYEN
                test_L = bl**2 * np.exp(-test_L0 / al**2 / 2)
            else:
                train_L, test_L = 0, 0
                for i in range(len(al)):
                    train_L += train_L0[i] / al[i]**2
                    test_L += test_L0[i] / al[i]**2
                train_L = bl * bl * np.exp(-train_L / 2) + 1e-4 * EYEN
                test_L = bl * bl * np.exp(-test_L / 2)

            if nystr:
                tmp_mat = eig_vec_K.T @ train_L
                alpha = EYEN - eig_vec_K @ np.linalg.inv(
                    tmp_mat @ eig_vec_K / N2 + inv_eig_val) @ tmp_mat / N2
                alpha = alpha @ W_nystr_Y * N2
            else:
                LWL_inv = chol_inv(train_L @ train_W @ train_L + train_L / N2 +
                                   JITTER * EYEN)
                alpha = LWL_inv @ train_L @ train_W @ train.y
            pred_mean = test_L @ alpha
            test_err = ((pred_mean - test.g)**2).mean()
            norm = alpha.T @ train_L @ alpha
        Nfeval += 1
        if prev_norm is not None:
            if norm[0, 0] / prev_norm >= 3:
                if opt_test_err is None:
                    opt_test_err = test_err
                    opt_params = params
                print(True, opt_params, opt_test_err, prev_norm, norm[0, 0])
                raise Exception

        if prev_norm is None or norm[0, 0] <= prev_norm:
            prev_norm = norm[0, 0]
        opt_test_err = test_err
        opt_params = params
        print(True, opt_params, opt_test_err, prev_norm, norm[0, 0])
Beispiel #12
0
    def callback0(params, timer=None):
        global Nfeval, prev_norm, opt_params, opt_test_err
        if Nfeval % 1 == 0:
            n_params = len(params)
            al, bl = np.exp(params)
            L = bl * bl * np.exp(-L0 / al / al / 2) + 1e-6 * EYEN
            if nystr:
                tmp_mat = eig_vec_K.T @ L
                alpha = EYEN - eig_vec_K @ np.linalg.inv(
                    tmp_mat @ eig_vec_K / N2 + inv_eig_val_K) @ tmp_mat / N2
                alpha = alpha @ W_nystr_Y * N2
            else:
                LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
                alpha = LWL_inv @ L @ W @ Y
            test_L = bl * bl * np.exp(
                -test_L0 / al / al / 2)  # l(test_X,X,al,bl)
            pred_mean = test_L @ alpha
            if timer:
                return
            test_err = ((pred_mean - test_G)**2).mean(
            )  # ((pred_mean-test_G)**2/np.diag(pred_cov)).mean()+(np.log(np.diag(pred_cov))).mean()
            norm = alpha.T @ L @ alpha
        Nfeval += 1
        if prev_norm is not None:
            if norm[0, 0] / prev_norm >= 3:
                if opt_params is None:
                    opt_test_err = test_err
                    opt_params = params
                print(True, opt_params, opt_test_err, prev_norm, norm[0, 0])
                raise Exception

        if prev_norm is None or norm[0, 0] <= prev_norm:
            prev_norm = norm[0, 0]
        opt_test_err = test_err
        opt_params = params
        print('params,test_err, norm: ', opt_params, opt_test_err, prev_norm,
              norm[0, 0])
Beispiel #13
0
    def callback0(params, timer=None):
        global Nfeval, prev_norm, opt_params, opt_test_err
        if Nfeval % 1 == 0:
            params = np.exp(params)
            al, bl = params[:-1], params[-1]
            L = bl * bl * np.exp(
                -L0[0] / al[0] / al[0] / 2) + bl * bl * np.exp(
                    -L0[1] / al[1] / al[1] / 2) + 1e-6 * EYEN
            if nystr:
                alpha = EYEN - eig_vec_K @ np.linalg.inv(
                    eig_vec_K.T @ L @ eig_vec_K / N2 +
                    np.diag(1 / eig_val_K / N2)) @ eig_vec_K.T @ L / N2
                alpha = alpha @ W_nystr @ Y * N2
            else:
                LWL_inv = chol_inv(L @ W @ L + L / N2 + JITTER * EYEN)
                alpha = LWL_inv @ L @ W @ Y
            pred_mean = L @ alpha
            if timer:
                return
            norm = alpha.T @ L @ alpha

        Nfeval += 1
        if prev_norm is not None:
            if norm[0, 0] / prev_norm >= 3:
                if opt_params is None:
                    opt_params = params
                    opt_test_err = ((pred_mean - Y)**2).mean()
                print(True, opt_params, opt_test_err, prev_norm)
                raise Exception

        if prev_norm is None or norm[0, 0] <= prev_norm:
            prev_norm = norm[0, 0]
        opt_params = params
        opt_test_err = ((pred_mean - Y)**2).mean()
        print('params,test_err, norm:', opt_params, opt_test_err, prev_norm)

        ages = np.linspace(
            min(X[:, 0]) - abs(min(X[:, 0])) * 0.05,
            max(X[:, 0]) + abs(max(X[:, 0])) * 0.05, 32)
        vitd = np.linspace(
            min(X[:, 1]) - abs(min(X[:, 1])) * 0.05,
            max(X[:, 1]) + abs(max(X[:, 1])) * 0.05, 64)

        X_mesh, Y_mesh = np.meshgrid(ages, vitd)
        table = bl**2 * np.hstack([
            np.exp(-_sqdist(X_mesh[:, [i]], X[:, [0]]) / al[0]**2 / 2 -
                   _sqdist(Y_mesh[:, [i]], X[:, [1]]) / al[1]**2 / 2) @ alpha
            for i in range(X_mesh.shape[1])
        ])
        maxv = np.max(table[:])
        minv = np.min(table[:])
        fig = plt.figure()
        ax = fig.add_subplot(111)

        # Generate a contour plot
        Y0 = data0[:, [4]]
        X0 = data0[:, [0, 2]]
        Z0 = data0[:, [0, 1]]
        ages = np.linspace(
            min(X0[:, 0]) - abs(min(X0[:, 0])) * 0.05,
            max(X0[:, 0]) + abs(max(X0[:, 0])) * 0.05, 32)
        vitd = np.linspace(
            min(X0[:, 1]) - abs(min(X0[:, 1])) * 0.05,
            max(X0[:, 1]) + abs(max(X0[:, 1])) * 0.05, 64)
        X_mesh, Y_mesh = np.meshgrid(ages, vitd)
        cpf = ax.contourf(X_mesh, Y_mesh, (table - minv) / (maxv - minv))
        # cp = ax.contour(X_mesh, Y_mesh, table)
        plt.colorbar(cpf, ax=ax)
        plt.xlabel('Age', fontsize=12)
        plt.ylabel('Vitamin D', fontsize=12)
        plt.xticks(fontsize=12)
        plt.yticks(fontsize=12)
        if IV:
            plt.savefig('VitD_IV.pdf', bbox_inches='tight')
        else:
            plt.savefig('VitD.pdf', bbox_inches='tight')
        plt.close('all')