Ejemplo n.º 1
0
def c_i(params,n,k,i,S,num_particles):
    first = np.zeros(S)
    second = np.zeros(S)
    samples = sample_theta(params,S)
    first = h_s(samples,n,k,num_particles)*gradient_log_recognition(params,samples,i)
    second = gradient_log_recognition(params,samples,i)
    return np.cov(first,second)[0][1]/np.cov(first,second)[1][1]
def envelope(X_env, Y_env, u):

    p, r = X_env.shape[1], Y_env.shape[1]
    linear_model = LinearRegression().fit(X_env, Y_env)
    err = Y_env - linear_model.predict(X_env)
    Sigma_res = np.cov(err.transpose())
    Sigma_Y = np.cov(Y_env.transpose())

    def cost(Gamma):
        X = np.matmul(Gamma, Gamma.T)
        out = -np.log(
            np.linalg.det(
                np.matmul(np.matmul(X, Sigma_res), X) +
                np.matmul(np.matmul(np.eye(r) - X, Sigma_Y),
                          np.eye(r) - X)))
        return (np.array(out))

    manifold = Grassmann(r, u)
    # manifold = Stiefel(r, u)
    problem = Problem(manifold=manifold, cost=cost, verbosity=0)
    solver = SteepestDescent()
    Gamma = solver.solve(problem)
    PSigma1_hat = np.matmul(Gamma, Gamma.T)
    PSigma2_hat = np.eye(r) - PSigma1_hat

    beta_hat = np.matmul(PSigma1_hat, linear_model.coef_)
    Sigma1_hat = np.matmul(np.matmul(PSigma1_hat, Sigma_res), PSigma1_hat)
    Sigma2_hat = np.matmul(np.matmul(np.eye(r) - PSigma1_hat, Sigma_res),
                           np.eye(r) - PSigma1_hat)
    alpha_hat = np.mean(Y_env - np.matmul(X_env, beta_hat.T), axis=0)

    return (alpha_hat.reshape(1, r), beta_hat.reshape(p, r))
Ejemplo n.º 3
0
def fiml_auto(sigma_chol):

    sigma_chol = sigma_chol.reshape(n_predictors + 1, n_predictors + 1)
    sigma = np.dot(sigma_chol, sigma_chol.T)

    test_x, test_y = X[mask], Y[mask]
    mask_train = ((mask + 1) % 2).astype(bool)
    train_x, train_y = X[mask_train], Y[mask_train]
    missing = ((mask_var + 1) % 2).astype(bool)

    joint_test = np.concatenate([test_y, test_x], axis=1)
    samp_cov = np.cov(joint_test.T)

    joint_train = np.concatenate([train_y, train_x], axis=1)
    samp_cov_t = np.cov(joint_train[:, 0:n_causes + 1].T)

    L = -np.trace(np.dot(np.linalg.inv(sigma), samp_cov))
    L -= np.log(np.linalg.det(sigma))
    L *= test_x.shape[0]

    det_sub = np.linalg.det(sigma[0:n_causes + 1, 0:n_causes + 1])

    L_tr = -np.trace(
        np.dot(np.linalg.inv(sigma[0:n_causes + 1, 0:n_causes + 1]),
               samp_cov_t))
    L_tr -= np.log(det_sub)
    L_tr *= train_x.shape[0]

    return -(L + L_tr)
Ejemplo n.º 4
0
def compute_weights_euler(odom_df):
    """Computes the covariance of the rotation matrix and the standard
    deviation of translation matrices to weight the samples in the optimization
    objective.

    Parameters:
        odom_df (pd.DataFrame):  DataFrame corresponding to odometry data for
            one of the lidar sensors for which we are interested in calculating
            weights.

    Returns:
        cov_t (np.array):  3x3 matrix corresponding to translation covariance.
        coe_E (np.array):  3x3 matrix corresponding to covariance of the
            Euler angles from rotation.
    """
    # Compute translations and compute covariance over all of them
    t = odom_df[["dx", "dy", "dz"]].values  # Translation - with shape (N, 3)
    cov_t = np.cov(t.T)  # Translation with shape (3, N)

    # Extract quaternions from odometric data frame
    Q = odom_df[["dqx", "dqy", "dqz", "dqw"]].values

    # Convert quaternions to euler angles
    E = np.zeros((Q.shape[0], 3))  # RPY angles array
    for i, q in enumerate(Q):  # Iterate over each quaternion
        E[i] = R.from_quat(q).as_euler('xyz', degrees=False)

    # From here, we need to extract rotations about x,y,z to compute covariance
    cov_E = np.cov(E.T)

    return cov_t, cov_E
Ejemplo n.º 5
0
 def callback(params, t, g):
     y = sample_gpp(x, 1, ker)
     #y=sample_function(x,1)
     preds = hyper_predict(params, x, y, nn_arch, act)  #[1,nd]
     if plot: p.plot_iter(ax, x, x, y, preds)
     cd = np.cov(y.ravel()) - np.cov(preds.ravel())
     print("ITER {} | OBJ {} COV DIFF {}".format(t, objective(params, t),
                                                 cd))
Ejemplo n.º 6
0
def c_i(params,i,S,num_particles):
    if S==1:
        return 0
    first = np.zeros(S)
    second = np.zeros(S)
    samples = sample_theta(params,S)
    first = h_s(samples,num_particles)*gradient_log_variational(params,samples,i)
    second = gradient_log_variational(params,samples,i)
    return np.cov(first,second)[0][1]/np.cov(first,second)[1][1]
Ejemplo n.º 7
0
    def _init_params(self, data, lengths=None, params='stmp'):
        X = data['obs']

        if 's' in params:
            self.startprob_.fill(1.0 / self.n_components)

        if 't' in params or 'm' in params or 'p' in params:

            kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                   random_state=self.random_state).fit(X)
            kmeans = kmmod.cluster_centers_

        if 't' in params:
            # TODO: estimate transitions from data (!) / consider n_tied=1
            if self.n_tied == 0:
                transmat = np.ones([self.n_components, self.n_components])
                np.fill_diagonal(transmat, 10.0)
                self.transmat_ = transmat  # .90 for self-transition

            else:
                transmat = np.zeros((self.n_components, self.n_components))
                transmat[range(self.n_components),
                         range(self.n_components)] = 100.0  # diagonal
                transmat[range(self.n_components - 1),
                         range(1, self.n_components)] = 1.0  # diagonal + 1
                transmat[[
                    r * (self.n_chain) - 1
                    for r in range(1, self.n_unique + 1)
                    for c in range(self.n_unique - 1)
                ], [
                    c * (self.n_chain) for r in range(self.n_unique)
                    for c in range(self.n_unique) if c != r
                ]] = 1.0

                self.transmat_ = np.copy(transmat)

        if 'm' in params:
            mu_init = np.zeros((self.n_unique, self.n_features))
            for u in range(self.n_unique):
                for f in range(self.n_features):
                    mu_init[u][f] = kmeans[u, f]

            self.mu_ = np.copy(mu_init)

        if 'p' in params:
            precision_init = np.zeros(
                (self.n_unique, self.n_features, self.n_features))
            for u in range(self.n_unique):
                if self.n_features == 1:
                    precision_init[u] = np.linalg.inv(
                        np.cov(X[kmmod.labels_ == u], bias=1))
                else:
                    precision_init[u] = np.linalg.inv(
                        np.cov(np.transpose(X[kmmod.labels_ == u])))

            self.precision_ = np.copy(precision_init)
Ejemplo n.º 8
0
def c_i(params, i, S, num_particles):
    if S == 1:
        return 0
    first = np.zeros(S)
    second = np.zeros(S)
    samples = sample_theta(params, S)
    first = h_s(samples, num_particles) * gradient_log_variational(
        params, samples, i)
    second = gradient_log_variational(params, samples, i)
    return np.cov(first, second)[0][1] / np.cov(first, second)[1][1]
Ejemplo n.º 9
0
def fiml(sigma_chol, n_predictors, mask, mask_var,X,Y, sub,n_ul = 0, 
         print_val = False):
    """
    Implements full information maximum likelihood for optimization.
    """
    sigma_chol = sigma_chol.reshape(n_predictors+1,n_predictors+1)
    sigma = np.dot(sigma_chol, sigma_chol.T)

    test_x, test_y = X[mask], Y[mask]

    mask_train = ((mask+1)%2).astype(bool)
    if n_ul>0:
        mask_train[0:n_ul] = False
    train_x, train_y = X[mask_train], Y[mask_train]
    missing = ((mask_var+1)%2).astype(bool)

    joint_test = np.concatenate([test_y, test_x],axis=1)
    samp_cov = np.cov(joint_test.T)

    joint_train = np.concatenate([train_y, train_x], axis=1)
    samp_cov_t = np.cov(joint_train[:,sub].T)

    L = np.linalg.solve(sigma, samp_cov)
    L = -np.trace(L)
    L -= np.log(np.linalg.det(sigma))
    L *= test_x.shape[0]

    det_sub = np.linalg.det(sigma[sub].T[sub].T)

    L_tr = np.linalg.solve(sigma[sub].T[sub].T,samp_cov_t)
    L_tr = -np.trace(L_tr)
    L_tr -= np.log(det_sub)
    L_tr *= train_x.shape[0]

    if n_ul > 0:
        set_n = np.arange(1,n_predictors+1)
        joint_ul = X[0:n_ul,:]#np.concatenate([Y[0:n_ul,:], X[0:n_ul,1:]], axis=1)

        samp_cov_t = np.cov(joint_ul.T)
        mask_ul = np.copy(mask_train)
        mask_ul[0:n_ul] = True
        mask_ul[n_ul:] = False
        det_sub = np.linalg.det(sigma[1:,1:])

        L_ul = -np.trace(np.dot(np.linalg.inv(sigma[1:,1:]),samp_cov_t))
        L_ul -= np.log(det_sub)
        L_ul *= n_ul
    else:
        L_ul = 0

    if print_val:
      print -(L + L_tr-L_ul)

    return -(L+L_tr-L_ul)
    def estimate_orth_subspaces(self, DataStruct):
        '''
        main optimization function
        '''

        # Grassman point?
        if LA.norm(np.dot(self.Q.T, self.Q) - np.eye(self.Q.shape[-1]),
                   ord='fro') > 1e-4:
            self._project_stiefel()

        # ----------------------------------------------------------------------- #

        # eGrad = grad(cost)
        # eHess = hessian(cost)

        # Perform optimization
        # ----------------------------------------------------------------------- #
        # ----------------------------------------------------------------------- #

        d, r = np.shape(self.Q)  # problem size
        print(d)

        manif = Stiefel(d, r)  # initialize manifold

        # instantiate problem
        problem = Problem(manifold=manif, cost=self._cost, verbosity=2)

        # initialize solver
        solver = TrustRegions(mingradnorm=1e-8,
                              minstepsize=1e-16,
                              logverbosity=1)

        # solve
        Xopt, optlog = solver.solve(problem)

        opt_subspaces = self._objfn(Xopt)

        # Align the axes within a subspace by variance high to low
        for j in range(self.numSubspaces):
            Aj = DataStruct.A[j]
            Qj = opt_subspaces[2].Q[j]
            # data projected onto subspace
            Aj_proj = np.dot((Aj - np.mean(Aj, 0)), Qj)
            if np.size(np.cov(Aj_proj.T)) < 2:
                V = 1
            else:
                V = LA.svd(np.cov(Aj_proj.T))[0]
            Qj = np.dot(Qj, V)
            opt_subspaces[2].Q[j] = Qj  # ranked top to low variance

        return opt_subspaces[2]
Ejemplo n.º 11
0
    def _init_params(self, data, lengths=None, params='stmp'):
        X = data['obs']

        if 's' in params:
            self.startprob_.fill(1.0 / self.n_components)

        if 't' in params or 'm' in params or 'p' in params:

            kmmod = cluster.KMeans(n_clusters=self.n_unique,
                                   random_state=self.random_state).fit(X)
            kmeans = kmmod.cluster_centers_

        if 't' in params:
            # TODO: estimate transitions from data (!) / consider n_tied=1
            if self.n_tied == 0:
                transmat = np.ones([self.n_components, self.n_components])
                np.fill_diagonal(transmat, 10.0)
                self.transmat_ = transmat  # .90 for self-transition

            else:
                transmat = np.zeros((self.n_components, self.n_components))
                transmat[range(self.n_components),
                         range(self.n_components)] = 100.0  # diagonal
                transmat[range(self.n_components-1),
                         range(1, self.n_components)] = 1.0  # diagonal + 1
                transmat[[r * (self.n_chain) - 1
                          for r in range(1, self.n_unique+1)
                          for c in range(self.n_unique-1)],
                         [c * (self.n_chain)
                          for r in range(self.n_unique)
                          for c in range(self.n_unique) if c != r]] = 1.0

                self.transmat_ = np.copy(transmat)

        if 'm' in params:
            mu_init = np.zeros((self.n_unique, self.n_features))
            for u in range(self.n_unique):
                for f in range(self.n_features):
                    mu_init[u][f] = kmeans[u, f]

            self.mu_ = np.copy(mu_init)

        if 'p' in params:
            precision_init = np.zeros((self.n_unique, self.n_features, self.n_features))
            for u in range(self.n_unique):
                if self.n_features == 1:
                    precision_init[u] = np.linalg.inv(np.cov(X[kmmod.labels_ == u], bias = 1))
                else:
                    precision_init[u] = np.linalg.inv(np.cov(np.transpose(X[kmmod.labels_ == u])))

            self.precision_ = np.copy(precision_init)
Ejemplo n.º 12
0
 def compute_stat(self, tst_data):
     """Compute the test statistic"""
     X, Y = tst_data.xy()
     # if X.shape[0] != Y.shape[0]:
     #    raise ValueError('Require nx = ny for now. Will improve if needed.')
     nx = X.shape[0]
     ny = Y.shape[0]
     mx = np.mean(X, 0)
     my = np.mean(Y, 0)
     mdiff = mx - my
     sx = np.cov(X.T)
     sy = np.cov(Y.T)
     s = old_div(sx, nx) + old_div(sy, ny)
     chi2_stat = np.dot(np.linalg.solve(s, mdiff), mdiff)
     return chi2_stat
    def _get_prepared_dat(self, DataStruct):
        ''' 
        computes initial subspaces for each dataset with specified dimensionalities
        and conccatenates them for the optimization procedure
        '''

        Q = []
        dim = []
        normFact = []
        covM = []

        for j in range(self.numSubspaces):

            Cj = np.cov(DataStruct.A[j].T)  #compute covariances
            d = DataStruct.dim[j]
            V, S = LA.svd(
                Cj
            )[:
              2]  # perform singular value decomposition to obtain initial subspaces & singular values
            Q = np.hstack([Q, V[:, 0:d]
                           ]) if len(Q) else V[:, 0:d]  # concatenate subspaces

            dim.append(d)
            normFact.append(self.bias[j] /
                            np.sum(S[0:d]))  # compute normalization factors
            covM.append(Cj)

        return Q, dim, normFact, covM
Ejemplo n.º 14
0
 def fit(cls, samples, return_instance = False): # observations expected in rows
     mu = samples.mean(0)
     var = np.atleast_2d(np.cov(samples, rowvar = 0))
     if return_instance:
         return mvnorm(mu, var)
     else:
         return (mu, var)
Ejemplo n.º 15
0
    def Fit(self, X, Y, **kwargs):
        self.cov = np.cov(Y.T)
        if not self.cov.shape:
            # you could be spllied with a 1 feature data set, in which cas self.cov is just a number
            self.eigval = self.cov
            self.eigvec = np.eye(1)
            self.cov = self.cov.reshape(-1, 1)
        else:
            self.eigval, self.eigvec = np.linalg.eigh(self.cov)
            idx = self.eigval.argsort()[::-1]
            self.eigval = self.eigval[idx]
            self.eigvec = self.eigvec[:, idx]
            if self.percentage is not None:
                total_val = sum(self.eigval)
                running_fraction = np.cumsum(self.eigval) / total_val
                self.component = np.searchsorted(running_fraction,
                                                 self.percentage)
                if self.component == 0:
                    self.component = 1

            assert (self.component <= Y.shape[1]
                    ), "number of components cannot exceed number of variables"
            self.reconstruction_error = np.sum(
                self.eigval[self.component:]) / self.cov.shape[0]
            if self.reconstruction_error is None or np.isnan(
                    self.reconstruction_error):
                self.reconstruction_error = 0
            self.eigval = self.eigval[0:self.component]
            self.eigvec = self.eigvec[:, 0:self.component]
Ejemplo n.º 16
0
    def callback_kl(prior_params, iter, g):
        kl = obj(prior_params, iter, N_samples=N_samples)
        kls.append(kl)
        min_kls.append(np.amin(kls))
        print("Iteration {} KL {} ".format(iter, kl))

        plot_lines(ax1, prior_params, inputs)
        plot_heatmap(ax2, prior_params)
        ax3.imshow(real_cov)
        plot_kls(ax4, kls, min_kls)

        plt.draw()
        # plt.savefig(os.path.join(plotting_dir, 'contours_iteration_' + str(iter) + '.pdf'))
        plt.pause(1.0 / 400.0)
        ax1.cla()
        ax2.cla()
        ax3.cla()
        ax4.cla()

        if iter % 10 == 0:
            samples = sample_obs(prior_params, N_samples, inputs, layer_sizes)
            y_mean, y_cov = np.mean(samples, axis=0), np.cov(samples.T)
            print(y_cov)
            print(y_cov - real_cov)
            print(y_mean - real_mean)
Ejemplo n.º 17
0
    def simulate(self, gof, dat, fea_tensor=None):
        """
        fea_tensor: n x d x J feature matrix
        """
        assert isinstance(gof, FSSD)
        n_simulate = self.n_simulate
        seed = self.seed
        if fea_tensor is None:
            _, fea_tensor = gof.compute_stat(dat, return_feature_tensor=True)

        J = fea_tensor.shape[2]
        X = dat.data()
        n = X.shape[0]
        # n x d*J
        Tau = fea_tensor.reshape(n, -1)
        # Make sure it is a matrix i.e, np.cov returns a scalar when Tau is
        # 1d.
        cov = np.cov(Tau.T) + np.zeros((1, 1))
        #cov = Tau.T.dot(Tau/n)

        arr_nfssd, eigs = FSSD.list_simulate_spectral(cov,
                                                      J,
                                                      n_simulate,
                                                      seed=self.seed)
        return {'sim_stats': arr_nfssd}
Ejemplo n.º 18
0
 def _stabilize_x(self):
     """Fix the rotation according to the SVD.
     """
     U, _, _ = np.linalg.svd(self.X, full_matrices=False)
     L = np.linalg.cholesky(np.cov(U.T) + 1e-6 * np.eye(self.D)).T
     self.X = np.linalg.solve(L, U.T).T
     self.X /= np.std(self.X, axis=0)
Ejemplo n.º 19
0
def vector_cov(X, Y, rowvar=True):
    if rowvar:
        d = X.shape[0]
    else:
        d = X.shape[1]
    _cov = np.cov(X, Y, rowvar=rowvar)

    return _cov[d:, d:]
Ejemplo n.º 20
0
Archivo: dr.py Proyecto: skn123/POT
def fda(X, y, p=2, reg=1e-16):
    """Fisher Discriminant Analysis

    Parameters
    ----------
    X : ndarray, shape (n, d)
        Training samples.
    y : ndarray, shape (n,)
        Labels for training samples.
    p : int, optional
        Size of dimensionnality reduction.
    reg : float, optional
        Regularization term >0 (ridge regularization)

    Returns
    -------
    P : ndarray, shape (d, p)
        Optimal transportation matrix for the given parameters
    proj : callable
        projection function including mean centering
    """

    mx = np.mean(X)
    X -= mx.reshape((1, -1))

    # data split between classes
    d = X.shape[1]
    xc = split_classes(X, y)
    nc = len(xc)

    p = min(nc - 1, p)

    Cw = 0
    for x in xc:
        Cw += np.cov(x, rowvar=False)
    Cw /= nc

    mxc = np.zeros((d, nc))

    for i in range(nc):
        mxc[:, i] = np.mean(xc[i])

    mx0 = np.mean(mxc, 1)
    Cb = 0
    for i in range(nc):
        Cb += (mxc[:, i] - mx0).reshape((-1, 1)) * \
            (mxc[:, i] - mx0).reshape((1, -1))

    w, V = linalg.eig(Cb, Cw + reg * np.eye(d))

    idx = np.argsort(w.real)

    Popt = V[:, idx[-p:]]

    def proj(X):
        return (X - mx.reshape((1, -1))).dot(Popt)

    return Popt, proj
    def get_basis_weight_parameters(self):
        mean = np.mean(self.basis_weights, axis=0)

        if (self.basis_weights.shape[0] > 1):
            var = np.cov(self.basis_weights, rowvar=False)
        else:
            var = None

        return mean, var
    def plot_contours(ax, params):
        samples = sample_obs(params, N_samples, inputs, layer_sizes)
        y_mean, y_cov = np.mean(samples, axis=0), np.cov(samples.T)

        approx_pdf = lambda x: mvn.logpdf(x, y_mean, y_cov)
        real_pdf = lambda x: mvn.logpdf(x, real_mean, real_cov)

        plot_isocontours(ax, approx_pdf, colors='r', label='approx')
        plot_isocontours(ax, real_pdf, colors='b', label='true')
Ejemplo n.º 23
0
 def initialize(self, datas, inputs=None, masks=None, tags=None):
     # Initialize with KMeans
     from sklearn.cluster import KMeans
     data = np.concatenate(datas)
     km = KMeans(self.K).fit(data)
     self.mus = km.cluster_centers_
     Sigmas = np.array(
         [np.cov(data[km.labels_ == k].T) for k in range(self.K)])
     self._sqrt_Sigmas = np.linalg.cholesky(Sigmas + 1e-8 * np.eye(self.D))
Ejemplo n.º 24
0
    def initialize(self, x, u, **kwargs):
        kmeans = kwargs.get('kmeans', True)
        if kmeans:
            from sklearn.cluster import KMeans
            _obs = np.concatenate(x)
            km = KMeans(self.nb_states).fit(_obs)

            self.mu = km.cluster_centers_
            self.cov = np.array([
                np.cov(_obs[km.labels_ == k].T) for k in range(self.nb_states)
            ])
        else:
            _cov = np.zeros((self.nb_states, self.dm_obs, self.dm_obs))
            for k in range(self.nb_states):
                self.mu[k, :] = np.mean(np.vstack([_x[0, :] for _x in x]),
                                        axis=0)
                _cov[k, ...] = np.cov(np.vstack([_x[0, :] for _x in x]),
                                      rowvar=False)
            self.cov = _cov
Ejemplo n.º 25
0
def calc_Sw_Sb(X, y):
    XMat = np.array(X)
    yMat = np.array(y)
    n_samples, n_features = XMat.shape

    Sw = np.zeros((n_features, n_features))
    Sb = np.zeros((n_features, n_features))

    X_cov = np.cov(XMat.T)

    labels = np.unique(yMat)
    for c in range(len(labels)):
        idx = np.squeeze(np.where(yMat == labels[c]))
        X_c = np.squeeze(XMat[idx[0], :])
        X_c_cov = np.cov(X_c.T)
        Sw += float(idx.shape[0]) / n_samples * X_c_cov

    Sb = X_cov - Sw
    return Sw, Sb
Ejemplo n.º 26
0
    def compute_stat(self, wtst_data):
        """Compute the test statistic"""
        residuals1, residuals2 = self.compute_residuals(wtst_data)

        dim = wtst_data.dim_y()
        if dim == 1:
            stat, pvalue = stats.f_oneway(residuals1, residuals2)
            return stat

        else:
            n1 = residuals1.shape[0]
            n2 = residuals2.shape[0]
            m1 = np.mean(residuals1, 0)
            m2 = np.mean(residuals2, 0)
            mdiff = m1 - m2
            s1 = np.cov(residuals1.T)
            s2 = np.cov(residuals2.T)
            s = old_div(s1, n1) + old_div(s2, n2)
            chi2_stat = np.dot(np.linalg.solve(s, mdiff), mdiff)
            return chi2_stat
Ejemplo n.º 27
0
 def compute_covariance(self, log_ps, ref):
     """
         log_ps: List of log density of the model.
         ref   : Samples from the true distribution.
     """
     l = len(log_ps)
     m = ref.shape[0]
     estimatess = np.zeros((l, self.n_estimates(m)))
     for i in range(l):
         estimatess[i] = self.estimates(log_ps[i], ref)
     return np.cov(estimatess) / self.n_estimates(m)
Ejemplo n.º 28
0
def subset_cv(sub_list, test_x, test_y,
              train_x, train_y,
              samp,
              num_predictors,
              n_ul = 0,
              x_ul = 0):

    """
    In MTL, preturn subset using cross-validation.
    """
    scores = []
    fold = 2

    kf = KFold(test_x.shape[0],n_folds = fold)
    
    for s in sub_list:
        scores_temp = []

        for train, test in kf:
            test_x_cv = test_x[train]
            test_y_cv = test_y[train]

            X = np.concatenate([test_x_cv, train_x],axis=0)
            Y = np.concatenate([test_y_cv, train_y],axis=0)
            app_xy = np.append(Y,X,axis=1)
            

            mask = np.zeros(app_xy.shape[0], dtype = bool)

            mask[0:test_x_cv.shape[0]] = True
            mask_var = np.zeros(app_xy.shape[1],dtype=bool)
            mask_var[0] = True

            if s.size>0:
                mask_var[s+1] = True
            
            app_xyt = np.concatenate([test_y_cv, test_x_cv],axis=1)
            sigma = np.cov(app_xyt.T) +2/(np.log(test_x_cv.shape[0])**2)*np.eye(app_xyt.shape[1]) 

            index=0
            stay = True
            
            while stay:
                sigma = e_step(sigma,app_xy,mask_var, mask)
                stay =  (index<20)
                index += 1
                
            cov_xsh = sigma[1:,1:]
            cov_xysh = sigma[0,1:][:,np.newaxis]

            beta_cs =  np.dot(np.linalg.inv(cov_xsh),cov_xysh)
            scores_temp.append(np.mean((test_x[test].dot(beta_cs)-test_y[test])**2))
        scores.append(np.mean(scores_temp))
    return sub_list[np.argmin(scores)]
Ejemplo n.º 29
0
def test_hamiltonian_monte_carlo_mv():
    np.random.seed(1)
    mu = np.arange(2)
    cov = 0.8 * np.ones((2, 2)) + 0.2 * np.eye(2)
    neg_log_p = AutogradPotential(neg_log_mvnormal(mu, cov))

    samples, *_ = hamiltonian_monte_carlo(100,
                                          neg_log_p,
                                          np.zeros(mu.shape),
                                          path_len=2.0)
    assert_allclose(mu, np.mean(samples, axis=0), atol=0.21)
    assert_allclose(cov, np.cov(samples.T), atol=0.31)
Ejemplo n.º 30
0
    def plot_contours(ax, params):
        samples = sample_bnn(params, N_samples, inputs, layer_sizes)
        y_mean, y_cov = np.mean(samples, axis=0), np.cov(samples.T)

        approx_pdf = lambda x: mvn.logpdf(x, y_mean, y_cov)
        real_pdf = lambda x: mvn.logpdf(x, real_mean, real_cov)

        plot_isocontours(ax, approx_pdf, colors='r', label='approx')
        plot_isocontours(ax, real_pdf, colors='b', label='true')

        gp_samples = sample_full_normal(real_mean, r, N_samples)
        ax.scatter(gp_samples[:, 0], gp_samples[:, 1], marker='x')
Ejemplo n.º 31
0
def score_estimator(alpha, m, x, K, alphaz, S=100):
    """
    Form score function estimator based on samples lmbda.
    """
    N = x.shape[0]
    if x.ndim == 1:
        D = 1
    else:
        D = x.shape[1]
    num_z = N * np.sum(K)
    L = K.shape[0]
    gradient = np.zeros((alpha.shape[0], 2))
    f = np.zeros((2 * S, alpha.shape[0], 2))
    h = np.zeros((2 * S, alpha.shape[0], 2))
    for s in range(2 * S):
        lmbda = npr.gamma(alpha, 1.)
        lmbda[lmbda < 1e-300] = 1e-300
        zw = m * lmbda / alpha
        lQ = logQ(zw, alpha, m)
        gradLQ = grad_logQ(zw, alpha, m)

        lP = logp(zw, K, x, alphaz)
        temp = lP - np.sum(lQ)
        f[s, :, :] = temp * gradLQ

        h[s, :, :] = gradLQ

    # CV
    covFH = np.zeros((alpha.shape[0], 2))
    covFH[:, 0] = np.diagonal(
        np.cov(f[S:, :, 0], h[S:, :, 0], rowvar=False)[:alpha.shape[0],
                                                       alpha.shape[0]:])
    covFH[:, 1] = np.diagonal(
        np.cov(f[S:, :, 1], h[S:, :, 1], rowvar=False)[:alpha.shape[0],
                                                       alpha.shape[0]:])
    a = covFH / np.var(h[S:, :, :], axis=0)

    return np.mean(f[:S, :, :], axis=0) - a * np.mean(h[:S, :, :], axis=0)
Ejemplo n.º 32
0
def load_data(dnm, subset_sz=None):
    data = np.load(dnm)
    X = data['X']
    Y = data['y']
    Xt = data['Xt']
    #standardize the covariates; last col is intercept, so no stdization there
    m = X[:, :-1].mean(axis=0)
    V = np.cov(X[:, :-1], rowvar=False) + 1e-12 * np.eye(X.shape[1] - 1)
    X[:, :-1] = np.linalg.solve(np.linalg.cholesky(V), (X[:, :-1] - m).T).T
    Xt[:, :-1] = np.linalg.solve(np.linalg.cholesky(V), (Xt[:, :-1] - m).T).T
    Z = data['y'][:, np.newaxis] * X
    data.close()
    if subset_sz is None:
        subset_sz = Z.shape[0]
    return Z[:subset_sz, :], X[:subset_sz, :-1], Y[:subset_sz]
Ejemplo n.º 33
0
def get_dataset(raw_data,
                label,
                id_patient,
                older_labeled_size=0.2,
                session_labeled_size=0.2):
    id_patient = id_patient - 1

    epochs_source, epochs_target, source_label, target_label = learn.model_selection.train_test_split(
        raw_data[id_patient], label[id_patient], train_size=older_labeled_size)
    epochs_target_train, epochs_target_test, target_train_label, target_test_label = learn.model_selection.train_test_split(
        epochs_target, target_label, train_size=session_labeled_size)

    source, target_train, target_test = {}, {}, {}
    source['labels'] = source_label
    target_train['labels'] = target_train_label
    target_test['labels'] = target_test_label

    source['covs'] = np.array([np.cov(epoch) for epoch in epochs_source])
    target_train['covs'] = np.array(
        [np.cov(epoch) for epoch in epochs_target_train])
    target_test['covs'] = np.array(
        [np.cov(epoch) for epoch in epochs_target_test])

    return (source, target_train, target_test)
Ejemplo n.º 34
0
def adaptive_metric(q_hist):
    '''
    :param: a list of vectors in which each vector is a sample from estimated target
    Approximate empirical covariance matrix of target distribution as mass matrix for kinetic distribution

    Note that the inverse metric more resembles the covariances of the target dist we will have a more uniform
    energy level set so easier exploration

    Once we are in typical set we run the Markov Chain using a default metric for a short window to build up
    initial estimate of the target covariance, then update the metric accordingly


    Particularly useful when Kinetic distribution is badly propsoed
    :return:
    '''
    q_hist = np.asarray(q_hist)
    cov = np.cov(q_hist.T)
    return cov
Ejemplo n.º 35
0
def fit_gaussian_draw(X, J, seed=28, reg=1e-7, eig_pow=1.0):
    """
    Fit a multivariate normal to the data X (n x d) and draw J points 
    from the fit. 
    - reg: regularizer to use with the covariance matrix
    - eig_pow: raise eigenvalues of the covariance matrix to this power to construct 
        a new covariance matrix before drawing samples. Useful to shrink the spread 
        of the variance.
    """
    with NumpySeedContext(seed=seed):
        d = X.shape[1]
        mean_x = np.mean(X, 0)
        cov_x = np.cov(X.T)
        if d==1:
            cov_x = np.array([[cov_x]])
        [evals, evecs] = np.linalg.eig(cov_x)
        evals = np.maximum(0, np.real(evals))
        assert np.all(np.isfinite(evals))
        evecs = np.real(evecs)
        shrunk_cov = evecs.dot(np.diag(evals**eig_pow)).dot(evecs.T) + reg*np.eye(d)
        V = np.random.multivariate_normal(mean_x, shrunk_cov, J)
    return V
Ejemplo n.º 36
0
Archivo: dr.py Proyecto: HelenLiGit/POT
def fda(X, y, p=2, reg=1e-16):
    """
    Fisher Discriminant Analysis


    Parameters
    ----------
    X : numpy.ndarray (n,d)
        Training samples
    y : np.ndarray (n,)
        labels for training samples
    p : int, optional
        size of dimensionnality reduction
    reg : float, optional
        Regularization term >0 (ridge regularization)


    Returns
    -------
    P : (d x p) ndarray
        Optimal transportation matrix for the given parameters
    proj : fun
        projection function including mean centering


    """

    mx = np.mean(X)
    X -= mx.reshape((1, -1))

    # data split between classes
    d = X.shape[1]
    xc = split_classes(X, y)
    nc = len(xc)

    p = min(nc - 1, p)

    Cw = 0
    for x in xc:
        Cw += np.cov(x, rowvar=False)
    Cw /= nc

    mxc = np.zeros((d, nc))

    for i in range(nc):
        mxc[:, i] = np.mean(xc[i])

    mx0 = np.mean(mxc, 1)
    Cb = 0
    for i in range(nc):
        Cb += (mxc[:, i] - mx0).reshape((-1, 1)) * \
            (mxc[:, i] - mx0).reshape((1, -1))

    w, V = linalg.eig(Cb, Cw + reg * np.eye(d))

    idx = np.argsort(w.real)

    Popt = V[:, idx[-p:]]

    def proj(X):
        return (X - mx.reshape((1, -1))).dot(Popt)

    return Popt, proj
Ejemplo n.º 37
0
 def fit(cls, samples, return_instance = False): # observations expected in rows
     raise(NotImplementedError())
     mu = samples.mean(0)
     return (mu, np.atleast_2d(np.cov(samples, rowvar = 0)))
Ejemplo n.º 38
0
    def _init_params(self, data, lengths=None, params='stmpaw'):
        X = data['obs']

        if self.n_lags == 0:
            super(ARTHMM, self)._init_params(data, lengths, params)
        else:
            if 's' in params:
                super(ARTHMM, self)._init_params(data, lengths, 's')

            if 't' in params:
                super(ARTHMM, self)._init_params(data, lengths, 't')

            if 'm' in params or 'a' in params or 'p' in params:
                kmmod = cluster.KMeans(
                    n_clusters=self.n_unique,
                    random_state=self.random_state).fit(X)
                kmeans = kmmod.cluster_centers_
                ar_mod = []
                ar_alpha = []
                ar_resid = []

                if not self.shared_alpha:
                    count = 0
                    for u in range(self.n_unique):
                        for f in range(self.n_features):
                            ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                            u,f]).fit(self.n_lags))
                            ar_alpha.append(ar_mod[count].params[1:])
                            ar_resid.append(ar_mod[count].resid)
                            count += 1
                else:
                    # run one AR model on most part of time series
                    # that has most points assigned after clustering
                    mf = np.argmax(np.bincount(kmmod.labels_))
                    for f in range(self.n_features):
                        ar_mod.append(smapi.tsa.AR(X[kmmod.labels_ == \
                                                    mf,f]).fit(self.n_lags))
                        ar_alpha.append(ar_mod[f].params[1:])
                        ar_resid.append(ar_mod[f].resid)

            if 'm' in params:
                mu_init = np.zeros((self.n_unique, self.n_features))
                for u in range(self.n_unique):
                    for f in range(self.n_features):
                        ar_idx = u
                        if self.shared_alpha:
                            ar_idx = 0
                        mu_init[u,f] = kmeans[u, f] - np.dot(
                        np.repeat(kmeans[u, f], self.n_lags), ar_alpha[ar_idx])
                self.mu_ = np.copy(mu_init)

            if 'p' in params:

                precision_init = \
                np.zeros((self.n_unique, self.n_features, self.n_features))

                for u in range(self.n_unique):
                    if self.n_features == 1:
                        precision_init[u] = 1.0/(np.var(X[kmmod.labels_ == u]))

                    else:
                        precision_init[u] = np.linalg.inv\
                        (np.cov(np.transpose(X[kmmod.labels_ == u])))

                        # Alternative: Initialization using ar_resid
                        #for f in range(self.n_features):
                        #    if not self.shared_alpha:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[count])
                        #        count += 1
                        #    else:
                        #        precision_init[u,f,f] = 1./np.var(ar_resid[f])'''

                self.precision_ = np.copy(precision_init)

            if 'a' in params:
                if self.shared_alpha:
                    alpha_init = np.zeros((1, self.n_lags))
                    alpha_init = ar_alpha[0].reshape((1, self.n_lags))
                else:
                    alpha_init = np.zeros((self.n_unique, self.n_lags))
                    for u in range(self.n_unique):
                        ar_idx = 0
                        alpha_init[u] = ar_alpha[ar_idx]
                        ar_idx += self.n_features
                self.alpha_ = np.copy(alpha_init)
Ejemplo n.º 39
0
    colors    = ['ug', 'gr', 'ri', 'iz']
    star_mags = np.array([du.colors_to_mags(r, c) 
                  for r, c in zip(coadd_df.star_mag_r.values,
                      coadd_df[['star_color_%s'%c for c in colors]].values)])

    gal_mags  = np.array([du.colors_to_mags(r, c) 
                    for r, c in zip(coadd_df.gal_mag_r.values,
                        coadd_df[['gal_color_%s'%c for c in colors]].values)])

    # look at galaxy fluxes regressed on stars
    x = star_mags[coadd_df.is_star.values]
    y = gal_mags[coadd_df.is_star.values]
    star_mag_model = LinearRegression()
    star_mag_model.fit(x, y)
    star_residuals = star_mag_model.predict(x) - y
    star_mag_model.res_covariance = np.cov(star_residuals.T)
    star_resids    = np.std(star_mag_model.predict(x) - y, axis=0)
    with open('star_mag_proposal.pkl', 'wb') as f:
        pickle.dump(star_mag_model, f)

    for i in xrange(5): 
        plt.scatter(star_mag_model.predict(x)[:,i], y[:,i], label=i, c=sns.color_palette()[i])

    plt.legend()
    plt.show()

    # look at star fluxes regressed on galaxy fluxes
    x = gal_mags[~coadd_df.is_star.values]
    y = star_mags[~coadd_df.is_star.values]
    gal_mag_model = LinearRegression()
    gal_mag_model.fit(x, y)