コード例 #1
0
ファイル: dpgmm.py プロジェクト: MechCoder/scikit-learn
    def _update_precisions(self, X, z):
        """Update the variational distributions for the precisions"""
        n_features = X.shape[1]
        if self.covariance_type == 'spherical':
            self.dof_ = 0.5 * n_features * np.sum(z, axis=0)
            for k in range(self.n_components):
                # could be more memory efficient ?
                sq_diff = np.sum((X - self.means_[k]) ** 2, axis=1)
                self.scale_[k] = 1.
                self.scale_[k] += 0.5 * np.sum(z.T[k] * (sq_diff + n_features))
                self.bound_prec_[k] = (
                    0.5 * n_features * (
                        digamma(self.dof_[k]) - np.log(self.scale_[k])))
            self.precs_ = np.tile(self.dof_ / self.scale_, [n_features, 1]).T

        elif self.covariance_type == 'diag':
            for k in range(self.n_components):
                self.dof_[k].fill(1. + 0.5 * np.sum(z.T[k], axis=0))
                sq_diff = (X - self.means_[k]) ** 2  # see comment above
                self.scale_[k] = np.ones(n_features) + 0.5 * np.dot(
                    z.T[k], (sq_diff + 1))
                self.precs_[k] = self.dof_[k] / self.scale_[k]
                self.bound_prec_[k] = 0.5 * np.sum(digamma(self.dof_[k])
                                                   - np.log(self.scale_[k]))
                self.bound_prec_[k] -= 0.5 * np.sum(self.precs_[k])

        elif self.covariance_type == 'tied':
            self.dof_ = 2 + X.shape[0] + n_features
            self.scale_ = (X.shape[0] + 1) * np.identity(n_features)
            for k in range(self.n_components):
                diff = X - self.means_[k]
                self.scale_ += np.dot(diff.T, z[:, k:k + 1] * diff)
            self.scale_ = pinvh(self.scale_)
            self.precs_ = self.dof_ * self.scale_
            self.det_scale_ = linalg.det(self.scale_)
            self.bound_prec_ = 0.5 * wishart_log_det(
                self.dof_, self.scale_, self.det_scale_, n_features)
            self.bound_prec_ -= 0.5 * self.dof_ * np.trace(self.scale_)

        elif self.covariance_type == 'full':
            for k in range(self.n_components):
                sum_resp = np.sum(z.T[k])
                self.dof_[k] = 2 + sum_resp + n_features
                self.scale_[k] = (sum_resp + 1) * np.identity(n_features)
                diff = X - self.means_[k]
                self.scale_[k] += np.dot(diff.T, z[:, k:k + 1] * diff)
                self.scale_[k] = pinvh(self.scale_[k])
                self.precs_[k] = self.dof_[k] * self.scale_[k]
                self.det_scale_[k] = linalg.det(self.scale_[k])
                self.bound_prec_[k] = 0.5 * wishart_log_det(
                    self.dof_[k], self.scale_[k], self.det_scale_[k],
                    n_features)
                self.bound_prec_[k] -= 0.5 * self.dof_[k] * np.trace(
                    self.scale_[k])
コード例 #2
0
    def fit(self, evidence_approx_method="fixed-point",max_iter = 100):
        '''
        Fits Bayesian linear regression, returns posterior mean and preision 
        of parameters
        
        Parameters:
        -----------
        max_iter: int
            Number of maximum iterations
            
        evidence_approx_method: str (DEFAULT = 'fixed-point')
            Method for approximating evidence, either 'fixed-point' or 'EM'
            
        # Theory Note:
        -----------------
        This code implements two methods to fit type II ML Bayesian Linear Regression:
        Expectation Maximization and Fixed Point Iterations. Expectation Maximization
        is generally slower so by default we use fixed-point.     
        '''
        # use type II maximum likelihood to find hyperparameters alpha and beta
        self._evidence_approx(max_iter = max_iter, method = evidence_approx_method)

        # find parameters of posterior distribution after last update of alpha & beta
        self.w_mu, self.w_precision = self._posterior_params(self.alpha,self.beta)
        self.D                      = pinvh(self.w_precision)
コード例 #3
0
ファイル: gp.py プロジェクト: davidar/gpo
 def __init__(self, xs, ys, noise=0.001, l=1, K=K_SE):
     self.xs = xs
     self.l = l
     self.K = K
     Kxx = self.K(xs, l=self.l)
     self.KxxI = pinvh(Kxx + (noise**2) * eye_like(Kxx))
     self.KxxI_ys = self.KxxI.dot(ys)
コード例 #4
0
ファイル: test_basic.py プロジェクト: metamorph-inc/meta-core
 def test_simple_complex(self):
     a = array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=float) + 1j * array(
         [[10, 8, 7], [6, 5, 4], [3, 2, 1]], dtype=float
     )
     a = np.dot(a, a.conj().T)
     a_pinv = pinvh(a)
     assert_array_almost_equal(np.dot(a, a_pinv), np.eye(3))
コード例 #5
0
    def _init_params(self,X):
        '''
        Initialise parameters
        '''
        d = X.shape[1]

        # initialise prior on means & precision matrices
        if 'means' in self.init_params:
            means0   = self.init_params['means']
        else:
            kms = KMeans(n_init = self.n_init, n_clusters = self.n_components)
            means0 = kms.fit(X).cluster_centers_
            
        if 'covar' in self.init_params:
            scale_inv0 = self.init_params['covar']
            scale0     = pinvh(scale_inv0)
        else:
            # heuristics to define broad prior over precision matrix
            diag_els   = np.abs(np.max(X,0) - np.min(X,0))/2
            scale_inv0 = np.diag( diag_els  )
            scale0     = np.diag( 1./ diag_els )
            
        if 'weights' in self.init_params:
            weights0  = np.ones(self.n_components) / self.n_components
        else:
            weights0  = np.ones(self.n_components) / self.n_components
          
        if 'dof' in self.init_params:
            dof0 = self.init_params['dof']
        else:
            dof0 = d
            
        if 'beta' in self.init_params:
            beta0 = self.init_params['beta']
        else:
            beta0 = 1e-3
            
        # clusters that are not pruned 
        self.active  = np.ones(self.n_components, dtype = np.bool)
        
        # checks initialisation errors in case parameters are user defined
        assert dof0 >= d,( 'Degrees of freedom should be larger than '
                                'dimensionality of data')
        assert means0.shape[0] == self.n_components,('Number of centrods defined should '
                                                     'be equal to number of components')
        assert means0.shape[1] == d,('Dimensioanlity of means and data '
                                          'should be the same')
        assert weights0.shape[0] == self.n_components,('Number of weights should be '
                                                           'to number of components')
        
        # At first iteration these parameters are equal to priors, but they change 
        # at each iteration of mean field approximation
        scale   = np.array([np.copy(scale0) for _ in range(self.n_components)])
        means   = np.copy(means0)
        weights = np.copy(weights0)
        dof     = dof0*np.ones(self.n_components)
        beta    = beta0*np.ones(self.n_components)
        init_   = [means0, scale0, scale_inv0, beta0, dof0, weights0]
        iter_   = [means, scale, scale_inv0, beta, dof, weights]
        return init_, iter_
コード例 #6
0
ファイル: forcegp_module.py プロジェクト: marcocaccin/MarcoGP
    def fit(self, X=None, y=None):
        """
        The Gaussian Process model fitting method.

        Parameters
        ----------
        X : double array_like
            An array with shape (n_samples, n_features) with the input at which
            observations were made.

        y : array_like, shape (n_samples, 3)
            An array with shape (n_eval, 3) with the observations of the output to be predicted.
            of shape (n_samples, 3) with the Best Linear Unbiased Prediction at x.

        Returns
        -------
        gp : self
            A fitted Gaussian Process model object awaiting data to perform
            predictions.
        """

        if X:
            K_list = self.calc_scalar_kernel_matrices(X)
        else:
            K_list = self.calc_scalar_kernel_matrices()

        # add diagonal noise to each scalar kernel matrix
        K_list = [K + self.nugget * sp.ones(K.shape[0]) for K in K_list]

        Kglob = None
        # outer_iv = [sp.outer(iv, iv.T) for iv in self.ivs] # NO, wrong
        for K, ivs, iv_corr in zip(K_list, self.ivs, self.iv_corr):
            # make the outer product tensor of shape (N_ls, N_ls, 3, 3) and multiply it with the scalar kernel
            K3D = iv_corr * K[:, :, None, None] * rotmat_multi(ivs, ivs)
            # reshape tensor onto a 2D array tiled with 3x3 matrix blocks
            if Kglob is None:
                Kglob = K3D
            else:
                Kglob += K3D
        Kglob = my_tensor_reshape(Kglob)
        # # all channels merged into one covariance matrix
        # # K^{glob}_{ij} = \sum_{k = 1}^{N_{IVs}} w_k D_{k, ij} |v_k^i\rangle \langle v_k^j |

        try:
            inv = LA.pinv2(Kglob)
        except LA.LinAlgError as err:
            print("pinv2 failed: %s. Switching to pinvh" % err)
            try:
                inv = LA.pinvh(Kglob)
            except LA.LinAlgError as err:
                print("pinvh failed: %s. Switching to pinv2" % err)
                inv = None

        # alpha is the vector of regression coefficients of GaussianProcess
        alpha = sp.dot(inv, self.y.ravel())

        if not self.low_memory:
            self.inverse = inv
            self.Kglob = Kglob
        self.alpha = sp.array(alpha)
コード例 #7
0
ファイル: gp.py プロジェクト: davidar/gpo
 def nll(l): # negative log likelihood
     #if l < 0.001: return 1e10
     Kxx = K(xs, l=l)
     Kxx += (noise**2) * eye_like(Kxx)
     res = (ys.T).dot(pinvh(Kxx)).dot(ys) + slogdet(Kxx)[1]
     res = squeeze(res)
     #print l,res
     return res
コード例 #8
0
ファイル: test_basic.py プロジェクト: 7924102/scipy
 def test_nonpositive(self):
     a = array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float)
     a = np.dot(a, a.T)
     u, s, vt = np.linalg.svd(a)
     s[0] *= -1
     a = np.dot(u * s, vt)  # a is now symmetric non-positive and singular
     a_pinv = pinv2(a)
     a_pinvh = pinvh(a)
     assert_array_almost_equal(a_pinv, a_pinvh)
コード例 #9
0
ファイル: linalg.py プロジェクト: argriffing/fiedlerology
def laplacian_sc_pinv(G,
        observed_nodelist, unobserved_nodelist, weight='weight'):
    """
    Pseudo-inverse of Laplacian Schur complement.

    """
    sc = laplacian_schur_complement(G,
            observed_nodelist, unobserved_nodelist, weight=weight)
    return pinvh(sc)
コード例 #10
0
ファイル: maxlike_base.py プロジェクト: jpceia/maxlike
 def error_matrix(self):
     """
     Covariance Matrix.
     """
     try:
         mask = self.flat_hess_.mask
     except AttributeError:
         mask = None
     return self._reshape_matrix(
         -np.ma.array(pinvh(self.flat_hess_.data), mask=mask))
コード例 #11
0
ファイル: bayes.py プロジェクト: allefpablo/scikit-learn
 def update_sigma(X, alpha_, lambda_, keep_lambda, n_samples):
     sigma_ = pinvh(np.eye(n_samples) / alpha_ +
                    np.dot(X[:, keep_lambda] *
                    np.reshape(1. / lambda_[keep_lambda], [1, -1]),
                    X[:, keep_lambda].T))
     sigma_ = np.dot(sigma_, X[:, keep_lambda] *
                     np.reshape(1. / lambda_[keep_lambda], [1, -1]))
     sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1]) *
                       X[:, keep_lambda].T, sigma_)
     sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda]
     return sigma_
コード例 #12
0
ファイル: gp.py プロジェクト: davidar/gpo
 def nll_prime(l):
     Kxx,Kps = K(xs, l=l, deriv=True)
     Kxx += (noise**2) * eye_like(Kxx)
     KxxI = pinvh(Kxx)
     a = KxxI.dot(ys)
     aaT = outer(a,a) # a . a.T
     KI_aaT = KxxI - aaT # K^-1 - aaT
     res = []
     for Kp in Kps:
         grad = trace_prod(KI_aaT, Kp)
         res.append(grad)
     return asarray(res)
コード例 #13
0
ファイル: hmm.py プロジェクト: Ferrine/sklearn-bayes
    def _init_params(self,*args):
        ''' 
        Initialise parameters of Bayesian Gaussian HMM
        '''
        d,X         = args
        pr_start, pr_trans = super(VBGaussianHMM,self)._init_params()

        # initialise prior on means & precision matrices
        if 'means' in self.init_params:
            means0   = check_array(self.init_params['means'])
        else:
            kms = KMeans(n_init = 2, n_clusters = self.n_hidden)
            means0 = kms.fit(X).cluster_centers_
            
        if 'covar' in self.init_params:
            scale_inv0 = self.init_params['covar']
            scale0     = pinvh(scale_inv0)
        else:
            # heuristics to define broad prior over precision matrix
            diag_els   = np.abs(np.max(X,0) - np.min(X,0))
            scale_inv0 = np.diag( diag_els  )
            scale0     = np.diag( 1./ diag_els )

        if 'dof' in self.init_params:
            dof0 = self.init_params['dof']
        else:
            dof0 = d
            
        if 'beta' in self.init_params:
            beta0 = self.init_params['beta']
        else:
            beta0 = 1e-3
        
        # checks initialisation errors in case parameters are user defined
        if dof0 < d:
            raise ValueError(( 'Degrees of freedom should be larger than '
                                'dimensionality of data'))
        if means0.shape[0] != self.n_hidden:
            raise ValueError(('Number of centrods defined should '
                              'be equal to number of components' ))
        if means0.shape[1] != d:
            raise ValueError(('Dimensionality of means and data '
                                          'should be the same'))

        scale   = np.array([np.copy(scale0) for _ in range(self.n_hidden)])
        dof     = dof0*np.ones(self.n_hidden)
        beta    = beta0*np.ones(self.n_hidden)
        
        # if user did not define initialisation parameters use KMeans
        return pr_start, pr_trans, {'means':means0,'scale':scale,'beta': beta,
                                    'dof':dof,'scale_inv0':scale_inv0}
コード例 #14
0
ファイル: fixes.py プロジェクト: adykstra/mne-python
    def get_precision(self):
        """Getter for the precision matrix.

        Returns
        -------
        precision_ : array-like,
            The precision matrix associated to the current covariance object.

        """
        if self.store_precision:
            precision = self.precision_
        else:
            precision = linalg.pinvh(self.covariance_)
        return precision
コード例 #15
0
 def _update_params(self, Nk, Xk, Sk, beta0, means0, dof0, scale_inv0,
                              beta, means, dof, scale):
     ''' Updates distribution of means and precisions '''
     for k in range(self.n_active):
         # update mean and precision for each cluster
         beta[k]   = beta0 + Nk[k]
         means[k]  = (beta0*means0[k,:] + Xk[k]) / beta[k]
         dof[k]    = dof0 + Nk[k] + 1
         # precision calculation is ugly but prevent overflow & underflow
         scale[k,:,:]  = pinvh( scale_inv0 + (beta0*Sk[k] + Nk[k]*Sk[k] - 
                              np.outer(Xk[k],Xk[k]) - 
                              beta0*np.outer(means0[k,:] - Xk[k],means0[k,:])) /
                              (beta0 + Nk[k]) )
     return beta,means,dof,scale
コード例 #16
0
    def fit(self, X, y=None):
        """Fits a Minimum Covariance Determinant with the FastMCD algorithm.

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training data, where n_samples is the number of samples
            and n_features is the number of features.

        y
            not used, present for API consistence purpose.

        Returns
        -------
        self : object

        """
        X = check_array(X, ensure_min_samples=2, estimator='MinCovDet')
        random_state = check_random_state(self.random_state)
        n_samples, n_features = X.shape
        # check that the empirical covariance is full rank
        if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features:
            warnings.warn("The covariance matrix associated to your dataset "
                          "is not full rank")
        # compute and store raw estimates
        raw_location, raw_covariance, raw_support, raw_dist = fast_mcd(
            X, support_fraction=self.support_fraction,
            cov_computation_method=self._nonrobust_covariance,
            random_state=random_state)
        if self.assume_centered:
            raw_location = np.zeros(n_features)
            raw_covariance = self._nonrobust_covariance(X[raw_support],
                                                        assume_centered=True)
            # get precision matrix in an optimized way
            precision = linalg.pinvh(raw_covariance)
            raw_dist = np.sum(np.dot(X, precision) * X, 1)
        self.raw_location_ = raw_location
        self.raw_covariance_ = raw_covariance
        self.raw_support_ = raw_support
        self.location_ = raw_location
        self.support_ = raw_support
        self.dist_ = raw_dist
        # obtain consistency at normal models
        self.correct_covariance(X)
        # re-weight estimator
        self.reweight_covariance(X)

        return self
コード例 #17
0
ファイル: hmm.py プロジェクト: Ferrine/sklearn-bayes
 def _vbm_emission_params(self,emission_params_prior, emission_params, sf_stats):
     '''
     Performs vbm step for parameters of emission probabilities
     '''
     Nk,Xk,Sk = sf_stats
     beta0, means0 = emission_params_prior['beta'], emission_params_prior['means']
     emission_params['beta']  =  beta0 + Nk
     emission_params['means'] = ((beta0*means0.T + Xk.T ) / emission_params['beta']).T
     emission_params['dof']   = emission_params_prior['dof'] + Nk + 1
     scale_inv0               = emission_params_prior['scale_inv0']
     for k in range(self.n_hidden):
         emission_params['scale'][k] = pinvh( scale_inv0 + (beta0*Sk[k] + Nk[k]*Sk[k] - 
                                    np.outer(Xk[k],Xk[k]) - 
                                    beta0*np.outer(means0[k] - Xk[k],means0[k])) /
                                    (beta0 + Nk[k]) )            
     return emission_params        
コード例 #18
0
def inversion_checker(X,alpha,beta):
    '''
    Checks accuracy of inversion
    '''
    n,m    = X.shape
    u,d,vh = np.linalg.svd(X,full_matrices = False)
    dsq    = d**2
    # precision matrix
    S      = beta*np.dot(X.T,X) + alpha*np.eye(m)
    
    # inverting precision : PREVIOUS VERSION
    a1     = np.dot( np.dot(vh.T, np.diag( 1. / (beta*dsq + alpha)) ), vh)
    
    # inverting precision : CURRENT VERSION
    a2     = pinvh(S)
    return [a1,a2]
コード例 #19
0
ファイル: test_bayes.py プロジェクト: allefpablo/scikit-learn
def test_bayesian_ridge_score_values():
    """Check value of score on toy example.

    Compute log marginal likelihood with equation (36) in Sparse Bayesian
    Learning and the Relevance Vector Machine (Tipping, 2001):

    - 0.5 * (log |Id/alpha + X.X^T/lambda| +
             y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi))
    + lambda_1 * log(lambda) - lambda_2 * lambda
    + alpha_1 * log(alpha) - alpha_2 * alpha

    and check equality with the score computed during training.
    """

    X, y = diabetes.data, diabetes.target
    n_samples = X.shape[0]
    # check with initial values of alpha and lambda (see code for the values)
    eps = np.finfo(np.float64).eps
    alpha_ = 1. / (np.var(y) + eps)
    lambda_ = 1.

    # value of the parameters of the Gamma hyperpriors
    alpha_1 = 0.1
    alpha_2 = 0.1
    lambda_1 = 0.1
    lambda_2 = 0.1

    # compute score using formula of docstring
    score = lambda_1 * log(lambda_) - lambda_2 * lambda_
    score += alpha_1 * log(alpha_) - alpha_2 * alpha_
    M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T)
    M_inv = pinvh(M)
    score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) +
                      n_samples * log(2 * np.pi))

    # compute score with BayesianRidge
    clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2,
                        lambda_1=lambda_1, lambda_2=lambda_2,
                        n_iter=1, fit_intercept=False, compute_score=True)
    clf.fit(X, y)

    assert_almost_equal(clf.scores_[0], score, decimal=9)
コード例 #20
0
ファイル: fixes.py プロジェクト: adykstra/mne-python
    def _set_covariance(self, covariance):
        """Saves the covariance and precision estimates

        Storage is done accordingly to `self.store_precision`.
        Precision stored only if invertible.

        Parameters
        ----------
        covariance : 2D ndarray, shape (n_features, n_features)
            Estimated covariance matrix to be stored, and from which precision
            is computed.

        """
        # covariance = check_array(covariance)
        # set covariance
        self.covariance_ = covariance
        # set precision
        if self.store_precision:
            self.precision_ = linalg.pinvh(covariance)
        else:
            self.precision_ = None
コード例 #21
0
ファイル: hmm.py プロジェクト: Ferrine/sklearn-bayes
 def fit(self,X,chain_index = []):
     '''
     Fits Bayesian Hidden Markov Model with Gaussian emission probabilities
     
     Parameters
     ----------
     X: array-like or csr_matrix of size (n_samples, n_features)
        Data Matrix
        
     Returns
     -------
     object: self
       self
     '''
     # preprocess data, 
     X = self._check_X_train(X)
     super(VBGaussianHMM,self)._fit(X, chain_index)
     self.means_ = self._emission_params_['means']
     scale, dof  = self._emission_params_['scale'], self._emission_params_['dof'] 
     self.covars_ = np.asarray([1./df * pinvh(sc) for sc,df in zip(scale,dof)])
     return self
コード例 #22
0
ファイル: mmr_kernel_subspace.py プロジェクト: ipa-nhg/kukadu
  def compute_kernel_row(self):

    K=np.zeros((self.nrow,self.nrow))

    drow={}  

    for i in range(self.nrow):
      iA=np.where(self.xdata[i]>0)[0]
      nsub=len(iA)
      if nsub>0:
        A=self.base_vectors[iA]
        AAT=self.kernel_col[iA.reshape((nsub,1)),iA]
        AATI=sp_lin.pinvh(AAT)
        drow[i]=(np.dot(AATI,A),iA)
      else:
        drow[i]=(None,np.array([]))

    for i in range(self.nrow):
      (BBTI,iB)=drow[i]
      if len(iB)>0:
        for j in range(i+1):
          (AATI,iA)=drow[j]
          ## = trace(Vhi.T,Vhj)=\braket{Vhi,Vhj}_{Frobenius}
          ## (BB^T)^{-1}BA^T(AA^T)^{-1}
          if len(iA)>0:
            BAT=self.kernel_col[iB.reshape((len(iB),1)),iA]
            ## xP=np.diag(np.dot(BBTI.T,np.dot(BAT,AATI)))
            xP=np.dot(BBTI,AATI.T)*BAT
            K[i,j]=np.sum(xP)  
            K[j,i]=K[i,j]
      ## if i%1000==0:
        ## print(i)
        
    d1=np.diag(K)
    d2=d1
    self.kernel_row=kernel_eval_nl(K,d1,d2,self.param_row)
    print('Modular kernel done')
    
    return
コード例 #23
0
 def _posterior(self, X, Y, alpha0, w0, full_covar = False):
     '''
     Iteratively refitted least squares method using l_bfgs_b.
     Finds MAP estimates for weights and Hessian at convergence point
     '''
     if self.solver == 'lbfgs_b':
         f = lambda w: _logistic_loss_and_grad(w,X,Y,alpha0)
         w = fmin_l_bfgs_b(f, x0 = w0, pgtol = self.tol_solver,
                           maxiter = self.n_iter_solver)[0]
     elif self.solver == 'newton_cg':
         f    = _logistic_loss
         grad = lambda w,*args: _logistic_loss_and_grad(w,*args)[1]
         hess = _logistic_grad_hess               
         args = (X,Y,alpha0)
         w    = newton_cg(hess, f, grad, w0, args=args,
                          maxiter=self.n_iter, tol=self.tol)[0]
     else:
         raise NotImplementedError('Liblinear solver is not yet implemented')
         
     # calculate negative of Hessian at w
     if self.fit_intercept:
         XW = np.dot(X,w[:-1]) + w[-1]
     else:
         XW = np.dot(X,w)
     s          = expit(XW)
     R          = s * (1 - s)
     negHessian = np.dot(X.T*R,X)
     
     # do not regularise constant
     alpha_vec     = np.zeros(negHessian.shape[0])
     alpha_vec     = alpha0   
     np.fill_diagonal(negHessian,np.diag(negHessian) + alpha_vec)
     if full_covar is False:
         eigs = 1./eigvalsh(negHessian)
         return [w,eigs]
     else:
         inv = pinvh(negHessian)
         return [w, inv]
コード例 #24
0
ファイル: bayes.py プロジェクト: yungbyun/scikit-learn
    def fit(self, X, y):
        """Fit the ARDRegression model according to the given training data
        and parameters.

        Iterative procedure to maximize the evidence

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        y : array, shape = [n_samples]
            Target values (integers). Will be cast to X's dtype if necessary

        Returns
        -------
        self : returns an instance of self.
        """
        X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)

        n_samples, n_features = X.shape
        coef_ = np.zeros(n_features)

        X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(
            X, y, self.fit_intercept, self.normalize, self.copy_X)

        # Launch the convergence loop
        keep_lambda = np.ones(n_features, dtype=bool)

        lambda_1 = self.lambda_1
        lambda_2 = self.lambda_2
        alpha_1 = self.alpha_1
        alpha_2 = self.alpha_2
        verbose = self.verbose

        # Initialization of the values of the parameters
        alpha_ = 1. / np.var(y)
        lambda_ = np.ones(n_features)

        self.scores_ = list()
        coef_old_ = None

        # Iterative procedure of ARDRegression
        for iter_ in range(self.n_iter):
            # Compute mu and sigma (using Woodbury matrix identity)
            sigma_ = pinvh(
                np.eye(n_samples) / alpha_ + np.dot(
                    X[:, keep_lambda] *
                    np.reshape(1. / lambda_[keep_lambda], [1, -1]),
                    X[:, keep_lambda].T))
            sigma_ = np.dot(
                sigma_, X[:, keep_lambda] *
                np.reshape(1. / lambda_[keep_lambda], [1, -1]))
            sigma_ = -np.dot(
                np.reshape(1. / lambda_[keep_lambda], [-1, 1]) *
                X[:, keep_lambda].T, sigma_)
            sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda]
            coef_[keep_lambda] = alpha_ * np.dot(
                sigma_, np.dot(X[:, keep_lambda].T, y))

            # Update alpha and lambda
            rmse_ = np.sum((y - np.dot(X, coef_))**2)
            gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_)
            lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) /
                                    ((coef_[keep_lambda])**2 + 2. * lambda_2))
            alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) /
                      (rmse_ + 2. * alpha_2))

            # Prune the weights with a precision over a threshold
            keep_lambda = lambda_ < self.threshold_lambda
            coef_[~keep_lambda] = 0

            # Compute the objective function
            if self.compute_score:
                s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()
                s += alpha_1 * log(alpha_) - alpha_2 * alpha_
                s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) +
                            np.sum(np.log(lambda_)))
                s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_**2).sum())
                self.scores_.append(s)

            # Check for convergence
            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
                if verbose:
                    print("Converged after %s iterations" % iter_)
                break
            coef_old_ = np.copy(coef_)

        self.coef_ = coef_
        self.alpha_ = alpha_
        self.sigma_ = sigma_
        self.lambda_ = lambda_
        self._set_intercept(X_offset_, y_offset_, X_scale_)
        return self
コード例 #25
0
def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4,
                enet_tol=1e-4, max_iter=100, verbose=False,
                return_costs=False, eps=np.finfo(np.float64).eps,
                return_n_iter=False):
    """l1-penalized covariance estimator

    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.

    Parameters
    ----------
    emp_cov : 2D ndarray, shape (n_features, n_features)
        Empirical covariance from which to compute the covariance estimate.

    alpha : positive float
        The regularization parameter: the higher alpha, the more
        regularization, the sparser the inverse covariance.

    cov_init : 2D array (n_features, n_features), optional
        The initial guess for the covariance.

    mode : {'cd', 'lars'}
        The Lasso solver to use: coordinate descent or LARS. Use LARS for
        very sparse underlying graphs, where p > n. Elsewhere prefer cd
        which is more numerically stable.

    tol : positive float, optional
        The tolerance to declare convergence: if the dual gap goes below
        this value, iterations are stopped.

    enet_tol : positive float, optional
        The tolerance for the elastic net solver used to calculate the descent
        direction. This parameter controls the accuracy of the search direction
        for a given column update, not of the overall parameter estimate. Only
        used for mode='cd'.

    max_iter : integer, optional
        The maximum number of iterations.

    verbose : boolean, optional
        If verbose is True, the objective function and dual gap are
        printed at each iteration.

    return_costs : boolean, optional
        If return_costs is True, the objective function and dual gap
        at each iteration are returned.

    eps : float, optional
        The machine-precision regularization in the computation of the
        Cholesky diagonal factors. Increase this for very ill-conditioned
        systems.

    return_n_iter : bool, optional
        Whether or not to return the number of iterations.

    Returns
    -------
    covariance : 2D ndarray, shape (n_features, n_features)
        The estimated covariance matrix.

    precision : 2D ndarray, shape (n_features, n_features)
        The estimated (sparse) precision matrix.

    costs : list of (objective, dual_gap) pairs
        The list of values of the objective function and the dual gap at
        each iteration. Returned only if return_costs is True.

    n_iter : int
        Number of iterations. Returned only if `return_n_iter` is set to True.

    See Also
    --------
    GraphLasso, GraphLassoCV

    Notes
    -----
    The algorithm employed to solve this problem is the GLasso algorithm,
    from the Friedman 2008 Biostatistics paper. It is the same algorithm
    as in the R `glasso` package.

    One possible difference with the `glasso` R package is that the
    diagonal coefficients are not penalized.

    """
    _, n_features = emp_cov.shape
    if alpha == 0:
        if return_costs:
            precision_ = linalg.inv(emp_cov)
            cost = - 2. * log_likelihood(emp_cov, precision_)
            cost += n_features * np.log(2 * np.pi)
            d_gap = np.sum(emp_cov * precision_) - n_features
            if return_n_iter:
                return emp_cov, precision_, (cost, d_gap), 0
            else:
                return emp_cov, precision_, (cost, d_gap)
        else:
            if return_n_iter:
                return emp_cov, linalg.inv(emp_cov), 0
            else:
                return emp_cov, linalg.inv(emp_cov)
    if cov_init is None:
        covariance_ = emp_cov.copy()
    else:
        covariance_ = cov_init.copy()
    # As a trivial regularization (Tikhonov like), we scale down the
    # off-diagonal coefficients of our starting point: This is needed, as
    # in the cross-validation the cov_init can easily be
    # ill-conditioned, and the CV loop blows. Beside, this takes
    # conservative stand-point on the initial conditions, and it tends to
    # make the convergence go faster.
    covariance_ *= 0.95
    diagonal = emp_cov.flat[::n_features + 1]
    covariance_.flat[::n_features + 1] = diagonal
    precision_ = linalg.pinvh(covariance_)

    indices = np.arange(n_features)
    costs = list()
    # The different l1 regression solver have different numerical errors
    if mode == 'cd':
        errors = dict(over='raise', invalid='ignore')
    else:
        errors = dict(invalid='raise')
    try:
        # be robust to the max_iter=0 edge case, see:
        # https://github.com/scikit-learn/scikit-learn/issues/4134
        d_gap = np.inf
        for i in range(max_iter):
            for idx in range(n_features):
                sub_covariance = np.ascontiguousarray(
                    covariance_[indices != idx].T[indices != idx])
                row = emp_cov[idx, indices != idx]
                with np.errstate(**errors):
                    if mode == 'cd':
                        # Use coordinate descent
                        coefs = -(precision_[indices != idx, idx]
                                  / (precision_[idx, idx] + 1000 * eps))
                        coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram(
                            coefs, alpha, 0, sub_covariance, row, row,
                            max_iter, enet_tol, check_random_state(None), False)
                    else:
                        # Use LARS
                        _, _, coefs = lars_path(
                            sub_covariance, row, Xy=row, Gram=sub_covariance,
                            alpha_min=alpha / (n_features - 1), copy_Gram=True,
                            method='lars', return_path=False)
                # Update the precision matrix
                precision_[idx, idx] = (
                    1. / (covariance_[idx, idx]
                          - np.dot(covariance_[indices != idx, idx], coefs)))
                precision_[indices != idx, idx] = (- precision_[idx, idx]
                                                   * coefs)
                precision_[idx, indices != idx] = (- precision_[idx, idx]
                                                   * coefs)
                coefs = np.dot(sub_covariance, coefs)
                covariance_[idx, indices != idx] = coefs
                covariance_[indices != idx, idx] = coefs
            d_gap = _dual_gap(emp_cov, precision_, alpha)
            cost = _objective(emp_cov, precision_, alpha)
            if verbose:
                print(
                    '[graph_lasso] Iteration % 3i, cost % 3.2e, dual gap %.3e'
                    % (i, cost, d_gap))
            if return_costs:
                costs.append((cost, d_gap))
            if np.abs(d_gap) < tol:
                break
            if not np.isfinite(cost) and i > 0:
                raise FloatingPointError('Non SPD result: the system is '
                                         'too ill-conditioned for this solver')
        else:
            warnings.warn('graph_lasso: did not converge after %i iteration:'
                          ' dual gap: %.3e' % (max_iter, d_gap),
                          ConvergenceWarning)
    except FloatingPointError as e:
        e.args = (e.args[0]
                  + '. The system is too ill-conditioned for this solver',)
        raise e

    if return_costs:
        if return_n_iter:
            return covariance_, precision_, costs, i + 1
        else:
            return covariance_, precision_, costs
    else:
        if return_n_iter:
            return covariance_, precision_, i + 1
        else:
            return covariance_, precision_
コード例 #26
0
def _c_step(X, n_support, random_state, remaining_iterations=30,
            initial_estimates=None, verbose=False,
            cov_computation_method=empirical_covariance):
    n_samples, n_features = X.shape
    dist = np.inf

    # Initialisation
    support = np.zeros(n_samples, dtype=bool)
    if initial_estimates is None:
        # compute initial robust estimates from a random subset
        support[random_state.permutation(n_samples)[:n_support]] = True
    else:
        # get initial robust estimates from the function parameters
        location = initial_estimates[0]
        covariance = initial_estimates[1]
        # run a special iteration for that case (to get an initial support)
        precision = linalg.pinvh(covariance)
        X_centered = X - location
        dist = (np.dot(X_centered, precision) * X_centered).sum(1)
        # compute new estimates
        support[np.argsort(dist)[:n_support]] = True

    X_support = X[support]
    location = X_support.mean(0)
    covariance = cov_computation_method(X_support)

    # Iterative procedure for Minimum Covariance Determinant computation
    det = fast_logdet(covariance)
    # If the data already has singular covariance, calculate the precision,
    # as the loop below will not be entered.
    if np.isinf(det):
        precision = linalg.pinvh(covariance)

    previous_det = np.inf
    while (det < previous_det and remaining_iterations > 0
            and not np.isinf(det)):
        # save old estimates values
        previous_location = location
        previous_covariance = covariance
        previous_det = det
        previous_support = support
        # compute a new support from the full data set mahalanobis distances
        precision = linalg.pinvh(covariance)
        X_centered = X - location
        dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1)
        # compute new estimates
        support = np.zeros(n_samples, dtype=bool)
        support[np.argsort(dist)[:n_support]] = True
        X_support = X[support]
        location = X_support.mean(axis=0)
        covariance = cov_computation_method(X_support)
        det = fast_logdet(covariance)
        # update remaining iterations for early stopping
        remaining_iterations -= 1

    previous_dist = dist
    dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1)
    # Check if best fit already found (det => 0, logdet => -inf)
    if np.isinf(det):
        results = location, covariance, det, support, dist
    # Check convergence
    if np.allclose(det, previous_det):
        # c_step procedure converged
        if verbose:
            print("Optimal couple (location, covariance) found before"
                  " ending iterations (%d left)" % (remaining_iterations))
        results = location, covariance, det, support, dist
    elif det > previous_det:
        # determinant has increased (should not happen)
        warnings.warn("Determinant has increased; this should not happen: "
                      "log(det) > log(previous_det) (%.15f > %.15f). "
                      "You may want to try with a higher value of "
                      "support_fraction (current value: %.3f)."
                      % (det, previous_det, n_support / n_samples),
                      RuntimeWarning)
        results = previous_location, previous_covariance, \
            previous_det, previous_support, previous_dist

    # Check early stopping
    if remaining_iterations == 0:
        if verbose:
            print('Maximum number of iterations reached')
        results = location, covariance, det, support, dist

    return results
コード例 #27
0
def fast_mcd(X, support_fraction=None,
             cov_computation_method=empirical_covariance,
             random_state=None):
    """Estimates the Minimum Covariance Determinant matrix.

    Read more in the :ref:`User Guide <robust_covariance>`.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
      The data matrix, with p features and n samples.

    support_fraction : float, 0 < support_fraction < 1
          The proportion of points to be included in the support of the raw
          MCD estimate. Default is None, which implies that the minimum
          value of support_fraction will be used within the algorithm:
          `[n_sample + n_features + 1] / 2`.

    cov_computation_method : callable, default empirical_covariance
        The function which will be used to compute the covariance.
        Must return shape (n_features, n_features)

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Notes
    -----
    The FastMCD algorithm has been introduced by Rousseuw and Van Driessen
    in "A Fast Algorithm for the Minimum Covariance Determinant Estimator,
    1999, American Statistical Association and the American Society
    for Quality, TECHNOMETRICS".
    The principle is to compute robust estimates and random subsets before
    pooling them into a larger subsets, and finally into the full data set.
    Depending on the size of the initial sample, we have one, two or three
    such computation levels.

    Note that only raw estimates are returned. If one is interested in
    the correction and reweighting steps described in [RouseeuwVan]_,
    see the MinCovDet object.

    References
    ----------

    .. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance
        Determinant Estimator, 1999, American Statistical Association
        and the American Society for Quality, TECHNOMETRICS

    .. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun,
        Asymptotics For The Minimum Covariance Determinant Estimator,
        The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400

    Returns
    -------
    location : array-like, shape (n_features,)
        Robust location of the data.

    covariance : array-like, shape (n_features, n_features)
        Robust covariance of the features.

    support : array-like, type boolean, shape (n_samples,)
        A mask of the observations that have been used to compute
        the robust location and covariance estimates of the data set.

    """
    random_state = check_random_state(random_state)

    X = check_array(X, ensure_min_samples=2, estimator='fast_mcd')
    n_samples, n_features = X.shape

    # minimum breakdown value
    if support_fraction is None:
        n_support = int(np.ceil(0.5 * (n_samples + n_features + 1)))
    else:
        n_support = int(support_fraction * n_samples)

    # 1-dimensional case quick computation
    # (Rousseeuw, P. J. and Leroy, A. M. (2005) References, in Robust
    #  Regression and Outlier Detection, John Wiley & Sons, chapter 4)
    if n_features == 1:
        if n_support < n_samples:
            # find the sample shortest halves
            X_sorted = np.sort(np.ravel(X))
            diff = X_sorted[n_support:] - X_sorted[:(n_samples - n_support)]
            halves_start = np.where(diff == np.min(diff))[0]
            # take the middle points' mean to get the robust location estimate
            location = 0.5 * (X_sorted[n_support + halves_start] +
                              X_sorted[halves_start]).mean()
            support = np.zeros(n_samples, dtype=bool)
            X_centered = X - location
            support[np.argsort(np.abs(X_centered), 0)[:n_support]] = True
            covariance = np.asarray([[np.var(X[support])]])
            location = np.array([location])
            # get precision matrix in an optimized way
            precision = linalg.pinvh(covariance)
            dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1)
        else:
            support = np.ones(n_samples, dtype=bool)
            covariance = np.asarray([[np.var(X)]])
            location = np.asarray([np.mean(X)])
            X_centered = X - location
            # get precision matrix in an optimized way
            precision = linalg.pinvh(covariance)
            dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1)
    # Starting FastMCD algorithm for p-dimensional case
    if (n_samples > 500) and (n_features > 1):
        # 1. Find candidate supports on subsets
        # a. split the set in subsets of size ~ 300
        n_subsets = n_samples // 300
        n_samples_subsets = n_samples // n_subsets
        samples_shuffle = random_state.permutation(n_samples)
        h_subset = int(np.ceil(n_samples_subsets *
                       (n_support / float(n_samples))))
        # b. perform a total of 500 trials
        n_trials_tot = 500
        # c. select 10 best (location, covariance) for each subset
        n_best_sub = 10
        n_trials = max(10, n_trials_tot // n_subsets)
        n_best_tot = n_subsets * n_best_sub
        all_best_locations = np.zeros((n_best_tot, n_features))
        try:
            all_best_covariances = np.zeros((n_best_tot, n_features,
                                             n_features))
        except MemoryError:
            # The above is too big. Let's try with something much small
            # (and less optimal)
            n_best_tot = 10
            all_best_covariances = np.zeros((n_best_tot, n_features,
                                             n_features))
            n_best_sub = 2
        for i in range(n_subsets):
            low_bound = i * n_samples_subsets
            high_bound = low_bound + n_samples_subsets
            current_subset = X[samples_shuffle[low_bound:high_bound]]
            best_locations_sub, best_covariances_sub, _, _ = select_candidates(
                current_subset, h_subset, n_trials,
                select=n_best_sub, n_iter=2,
                cov_computation_method=cov_computation_method,
                random_state=random_state)
            subset_slice = np.arange(i * n_best_sub, (i + 1) * n_best_sub)
            all_best_locations[subset_slice] = best_locations_sub
            all_best_covariances[subset_slice] = best_covariances_sub
        # 2. Pool the candidate supports into a merged set
        # (possibly the full dataset)
        n_samples_merged = min(1500, n_samples)
        h_merged = int(np.ceil(n_samples_merged *
                       (n_support / float(n_samples))))
        if n_samples > 1500:
            n_best_merged = 10
        else:
            n_best_merged = 1
        # find the best couples (location, covariance) on the merged set
        selection = random_state.permutation(n_samples)[:n_samples_merged]
        locations_merged, covariances_merged, supports_merged, d = \
            select_candidates(
                X[selection], h_merged,
                n_trials=(all_best_locations, all_best_covariances),
                select=n_best_merged,
                cov_computation_method=cov_computation_method,
                random_state=random_state)
        # 3. Finally get the overall best (locations, covariance) couple
        if n_samples < 1500:
            # directly get the best couple (location, covariance)
            location = locations_merged[0]
            covariance = covariances_merged[0]
            support = np.zeros(n_samples, dtype=bool)
            dist = np.zeros(n_samples)
            support[selection] = supports_merged[0]
            dist[selection] = d[0]
        else:
            # select the best couple on the full dataset
            locations_full, covariances_full, supports_full, d = \
                select_candidates(
                    X, n_support,
                    n_trials=(locations_merged, covariances_merged),
                    select=1,
                    cov_computation_method=cov_computation_method,
                    random_state=random_state)
            location = locations_full[0]
            covariance = covariances_full[0]
            support = supports_full[0]
            dist = d[0]
    elif n_features > 1:
        # 1. Find the 10 best couples (location, covariance)
        # considering two iterations
        n_trials = 30
        n_best = 10
        locations_best, covariances_best, _, _ = select_candidates(
            X, n_support, n_trials=n_trials, select=n_best, n_iter=2,
            cov_computation_method=cov_computation_method,
            random_state=random_state)
        # 2. Select the best couple on the full dataset amongst the 10
        locations_full, covariances_full, supports_full, d = select_candidates(
            X, n_support, n_trials=(locations_best, covariances_best),
            select=1, cov_computation_method=cov_computation_method,
            random_state=random_state)
        location = locations_full[0]
        covariance = covariances_full[0]
        support = supports_full[0]
        dist = d[0]

    return location, covariance, support, dist
コード例 #28
0
def equality_time_graphical_lasso(
        S,
        K_init,
        max_iter,
        loss,
        C,
        rho,  # n_samples=None, 
        psi,
        gamma,
        tol,
        rtol,
        verbose,
        return_history,
        return_n_iter,
        mode,
        compute_objective,
        stop_at,
        stop_when,
        update_rho_options,
        init):
    """Equality constrained time-varying graphical LASSO solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1})
        s.t. objective = c_i for i = 1, ..., T

    where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    gamma: float, optional
        Kernel parameter when psi is chosen to be 'kernel'.
    constrained_to: float or ndarray, shape (time steps)
        Log likelihood constraints for K_i
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    psi_name = psi.__name__

    if loss == 'LL':
        loss_function = neg_logl
    else:
        loss_function = dtrace

    K = K_init
    Z_0 = K.copy()
    Z_1 = K.copy()[:-1]
    Z_2 = K.copy()[1:]

    u = np.zeros((S.shape[0]))
    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    I = np.eye(S.shape[1])

    checks = [
        convergence(
            obj=equality_objective(loss_function, S, K, C, Z_0, Z_1, Z_2, psi))
    ]

    for iteration_ in range(max_iter):
        # update K
        A_K = U_0 - Z_0
        A_K[:-1] += Z_1 - U_1
        A_K[1:] += Z_2 - U_2
        A_K += A_K.transpose(0, 2, 1)
        A_K /= 2.

        K = soft_thresholding_od(A_K, lamda=1. / rho)

        # update Z_0
        residual_loss_constraint_u = loss_gen(loss_function, S, Z_0) - C + u

        A_Z = K + U_0
        A_Z += A_Z.transpose(0, 2, 1)
        A_Z /= 2.

        if loss_function == neg_logl:
            A_Z -= residual_loss_constraint_u[:, None, None] * S
            Z_0 = np.array([
                prox_logdet_constrained(_A, _a, I)
                for _A, _a in zip(A_Z, residual_loss_constraint_u)
            ])
        elif loss_function == dtrace:
            Z_0 = np.array([
                prox_dtrace_constrained(_A, _S, _a, I)
                for _A, _S, _a in zip(A_Z, S, residual_loss_constraint_u)
            ])

        # other Zs
        A_1 = K[:-1] + U_1
        A_2 = K[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        residual_loss_constraint = loss_gen(loss_function, S, Z_0) - C
        u += residual_loss_constraint
        U_0 += K - Z_0
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        print(residual_loss_constraint)

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            np.sum(residual_loss_constraint**2) + squared_norm(K - Z_0) +
            squared_norm(K[:-1] - Z_1) + squared_norm(K[1:] - Z_2))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old))

        obj = equality_objective(loss_function, S, K, C, Z_0, Z_1, Z_2,
                                 psi) if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(Z_0.size + 2 * Z_1.size + S.shape[0]) * tol +
            rtol * max(
                np.sqrt(
                    np.sum(C**2) + squared_norm(Z_0) + squared_norm(Z_1) +
                    squared_norm(Z_2)),
                np.sqrt(
                    np.sum(
                        (residual_loss_constraint + C)**2) + squared_norm(K) +
                    squared_norm(K[:-1]) + squared_norm(K[1:]))),
            e_dual=np.sqrt(Z_0.size + 2 * Z_1.size) * tol + rtol * rho *
            np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
        )
        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        u *= rho / rho_new
        U_0 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new

        #assert is_pos_def(Z_0)
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in K])
    return_list = [K, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
コード例 #29
0
ファイル: impute.py プロジェクト: wuyangf7/fizi
def _impute(merged_snps, ref, annot, taus, gwas_n, obs, to_impute, obsZ, ridge,
            run_fizi):
    """
    this is the internal logic for the imputation

    I refactored this into diff function to improve flexibility for any changes downstream
    (e.g., MI, sampling, sketching, etc)

    testing out multiple imputation (MI) for the functional part of fizi
    we could incorporate MI into the estimation of LD as well but it might come with a big computational hit
    one cool trick might be to use sketching to speed up LD estimation to maintain performance for MI

    :param merged_snps: pyfizi.MergedPanel object containing merged GWAS and LDRef data
    :param ref: pyfizi.RefPanel object for reference genotype data at the region
    :param annot: pyfizi.Annot object representing the functional annotations at the region (default: None)
    :param taus: pyfizi.Tau object representing the prior variance terms for functional categories (default: None)
    :param gwas_n: numpy.ndarray or int GWAS sample size. If int assumes sample size is uniform at each SNP.
                    Not required if 'N' is column in GWAS data (default: None)
    :param obsZ: numpy.ndarray vector of observed Z-scores that have been flipped to match ref panel
    :param obs: numpy.ndarray boolean vector marking which rows in `merged_snps` have observed Z-scores
    :param to_impute: numpy.ndarray boolean vector marking which rows in `merged_snps` need to be imputed
    :param ridge: float Ridge term to regularize LD estimation (default=0.1)
    :param run_fizi: bool indicating if fizi or impg is run

    :return: (numpy.ndarray imputed_z, numpy.ndarray pvalues, numpy.ndarray r2blups)
    """

    from numpy.linalg import multi_dot as mdot
    from scipy.linalg import pinvh
    from scipy.stats import chi2

    log = logging.getLogger(pyfizi.LOG)
    nobs = np.sum(obs)
    nimp = np.sum(to_impute)

    # compute linkage-disequilibrium estimate
    log.debug("Estimating LD for {} SNPs".format(len(merged_snps)))
    LD = ref.estimate_ld(merged_snps, adjust=ridge)

    log.debug("Partitioning LD into quadrants")
    Voo_ld = LD[obs].T[obs].T
    Vuo_ld = LD[to_impute].T[obs].T
    Vou_ld = Vuo_ld.T
    Vuu_ld = LD[to_impute].T[to_impute].T

    if run_fizi:
        if taus is not None:
            A = annot.get_matrix(merged_snps, taus.names)
            estimates = taus.estimates
            D = np.diag(gwas_n * np.dot(A, estimates))
            Do = D.T[obs].T[obs]
            Du = D.T[to_impute].T[to_impute]
            uoV = Vuo_ld + mdot([Vuu_ld, Du, Vuo_ld]) + mdot(
                [Vuo_ld, Do, Voo_ld])
            ooV = Voo_ld + mdot([Voo_ld, Do, Voo_ld]) + mdot(
                [Vou_ld, Du, Vuo_ld])
            uuV = Vuu_ld + mdot([Vuu_ld, Du, Vuu_ld]) + mdot(
                [Vuo_ld, Do, Vou_ld])
        else:
            A = annot.get_matrix(merged_snps)
            names = annot.names
            Ao = A[obs]
            flag = np.mean(Ao != 0, axis=0) > 0
            Ao = Ao.T[flag].T
            A = A.T[flag].T
            names = names[flag]

            log.debug("Starting inference for variance parameters")
            estimates = pyfizi.infer_taus(obsZ, Voo_ld, Ao)
            if estimates is not None:
                log.debug("Finished variance parameter inference")

                estimates, sigma2e = estimates
                # rescale estimates
                estimates = estimates * np.sum(Ao != 0, axis=0) / np.sum(
                    A != 0, axis=0)

                # N gets inferred as part of the parameter
                D = np.diag(np.dot(A, estimates))
                Do = D.T[obs].T[obs]
                Du = D.T[to_impute].T[to_impute]
                uoV = Vuo_ld + mdot([Vuu_ld, Du, Vuo_ld]) + mdot(
                    [Vuo_ld, Do, Voo_ld])
                ooV = Voo_ld + mdot([Voo_ld, Do, Voo_ld]) + mdot(
                    [Vou_ld, Du, Vuo_ld])
                uuV = Vuu_ld + mdot([Vuu_ld, Du, Vuu_ld]) + mdot(
                    [Vuo_ld, Do, Vou_ld])
            else:
                log.warning(
                    "Variance parameter optimization failed. Defaulting to ImpG"
                )
                # estimation failed... default to ImpG
                uoV = Vuo_ld
                ooV = Voo_ld
                uuV = Vuu_ld
    else:
        uoV = Vuo_ld
        ooV = Voo_ld
        uuV = Vuu_ld

    log.debug(
        "Computing inverse of variance-covariance matrix for {} observed SNPs".
        format(nobs))
    ooVinv = pinvh(ooV, check_finite=False)

    log.debug("Imputing {} SNPs from {} observed scores".format(nimp, nobs))
    impZs = mdot([uoV, ooVinv, obsZ])

    # compute r2-pred scores
    r2blup = np.diag(mdot([uoV, ooVinv, uoV.T])) / np.diag(uuV)

    # compute two-sided z-test for p-value
    pvals = chi2.sf(impZs**2, 1)

    return impZs, pvals, r2blup
コード例 #30
0
def fast_mcd(X,
             support_fraction=None,
             cov_computation_method=empirical_covariance,
             random_state=None):
    """Estimates the Minimum Covariance Determinant matrix.

    Read more in the :ref:`User Guide <robust_covariance>`.

    Parameters
    ----------
    X : array-like, shape (n_samples, n_features)
      The data matrix, with p features and n samples.

    support_fraction : float, 0 < support_fraction < 1
          The proportion of points to be included in the support of the raw
          MCD estimate. Default is None, which implies that the minimum
          value of support_fraction will be used within the algorithm:
          `[n_sample + n_features + 1] / 2`.

    cov_computation_method : callable, default empirical_covariance
        The function which will be used to compute the covariance.
        Must return shape (n_features, n_features)

    random_state : int, RandomState instance or None, optional (default=None)
        If int, random_state is the seed used by the random number generator;
        If RandomState instance, random_state is the random number generator;
        If None, the random number generator is the RandomState instance used
        by `np.random`.

    Notes
    -----
    The FastMCD algorithm has been introduced by Rousseuw and Van Driessen
    in "A Fast Algorithm for the Minimum Covariance Determinant Estimator,
    1999, American Statistical Association and the American Society
    for Quality, TECHNOMETRICS".
    The principle is to compute robust estimates and random subsets before
    pooling them into a larger subsets, and finally into the full data set.
    Depending on the size of the initial sample, we have one, two or three
    such computation levels.

    Note that only raw estimates are returned. If one is interested in
    the correction and reweighting steps described in [RouseeuwVan]_,
    see the MinCovDet object.

    References
    ----------

    .. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance
        Determinant Estimator, 1999, American Statistical Association
        and the American Society for Quality, TECHNOMETRICS

    .. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun,
        Asymptotics For The Minimum Covariance Determinant Estimator,
        The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400

    Returns
    -------
    location : array-like, shape (n_features,)
        Robust location of the data.

    covariance : array-like, shape (n_features, n_features)
        Robust covariance of the features.

    support : array-like, type boolean, shape (n_samples,)
        A mask of the observations that have been used to compute
        the robust location and covariance estimates of the data set.

    """
    random_state = check_random_state(random_state)

    X = check_array(X, ensure_min_samples=2, estimator='fast_mcd')
    n_samples, n_features = X.shape

    # minimum breakdown value
    if support_fraction is None:
        n_support = int(np.ceil(0.5 * (n_samples + n_features + 1)))
    else:
        n_support = int(support_fraction * n_samples)

    # 1-dimensional case quick computation
    # (Rousseeuw, P. J. and Leroy, A. M. (2005) References, in Robust
    #  Regression and Outlier Detection, John Wiley & Sons, chapter 4)
    if n_features == 1:
        if n_support < n_samples:
            # find the sample shortest halves
            X_sorted = np.sort(np.ravel(X))
            diff = X_sorted[n_support:] - X_sorted[:(n_samples - n_support)]
            halves_start = np.where(diff == np.min(diff))[0]
            # take the middle points' mean to get the robust location estimate
            location = 0.5 * (X_sorted[n_support + halves_start] +
                              X_sorted[halves_start]).mean()
            support = np.zeros(n_samples, dtype=bool)
            X_centered = X - location
            support[np.argsort(np.abs(X_centered), 0)[:n_support]] = True
            covariance = np.asarray([[np.var(X[support])]])
            location = np.array([location])
            # get precision matrix in an optimized way
            precision = linalg.pinvh(covariance)
            dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1)
        else:
            support = np.ones(n_samples, dtype=bool)
            covariance = np.asarray([[np.var(X)]])
            location = np.asarray([np.mean(X)])
            X_centered = X - location
            # get precision matrix in an optimized way
            precision = linalg.pinvh(covariance)
            dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1)
    # Starting FastMCD algorithm for p-dimensional case
    if (n_samples > 500) and (n_features > 1):
        # 1. Find candidate supports on subsets
        # a. split the set in subsets of size ~ 300
        n_subsets = n_samples // 300
        n_samples_subsets = n_samples // n_subsets
        samples_shuffle = random_state.permutation(n_samples)
        h_subset = int(
            np.ceil(n_samples_subsets * (n_support / float(n_samples))))
        # b. perform a total of 500 trials
        n_trials_tot = 500
        # c. select 10 best (location, covariance) for each subset
        n_best_sub = 10
        n_trials = max(10, n_trials_tot // n_subsets)
        n_best_tot = n_subsets * n_best_sub
        all_best_locations = np.zeros((n_best_tot, n_features))
        try:
            all_best_covariances = np.zeros(
                (n_best_tot, n_features, n_features))
        except MemoryError:
            # The above is too big. Let's try with something much small
            # (and less optimal)
            all_best_covariances = np.zeros(
                (n_best_tot, n_features, n_features))
            n_best_tot = 10
            n_best_sub = 2
        for i in range(n_subsets):
            low_bound = i * n_samples_subsets
            high_bound = low_bound + n_samples_subsets
            current_subset = X[samples_shuffle[low_bound:high_bound]]
            best_locations_sub, best_covariances_sub, _, _ = select_candidates(
                current_subset,
                h_subset,
                n_trials,
                select=n_best_sub,
                n_iter=2,
                cov_computation_method=cov_computation_method,
                random_state=random_state)
            subset_slice = np.arange(i * n_best_sub, (i + 1) * n_best_sub)
            all_best_locations[subset_slice] = best_locations_sub
            all_best_covariances[subset_slice] = best_covariances_sub
        # 2. Pool the candidate supports into a merged set
        # (possibly the full dataset)
        n_samples_merged = min(1500, n_samples)
        h_merged = int(
            np.ceil(n_samples_merged * (n_support / float(n_samples))))
        if n_samples > 1500:
            n_best_merged = 10
        else:
            n_best_merged = 1
        # find the best couples (location, covariance) on the merged set
        selection = random_state.permutation(n_samples)[:n_samples_merged]
        locations_merged, covariances_merged, supports_merged, d = \
            select_candidates(
                X[selection], h_merged,
                n_trials=(all_best_locations, all_best_covariances),
                select=n_best_merged,
                cov_computation_method=cov_computation_method,
                random_state=random_state)
        # 3. Finally get the overall best (locations, covariance) couple
        if n_samples < 1500:
            # directly get the best couple (location, covariance)
            location = locations_merged[0]
            covariance = covariances_merged[0]
            support = np.zeros(n_samples, dtype=bool)
            dist = np.zeros(n_samples)
            support[selection] = supports_merged[0]
            dist[selection] = d[0]
        else:
            # select the best couple on the full dataset
            locations_full, covariances_full, supports_full, d = \
                select_candidates(
                    X, n_support,
                    n_trials=(locations_merged, covariances_merged),
                    select=1,
                    cov_computation_method=cov_computation_method,
                    random_state=random_state)
            location = locations_full[0]
            covariance = covariances_full[0]
            support = supports_full[0]
            dist = d[0]
    elif n_features > 1:
        # 1. Find the 10 best couples (location, covariance)
        # considering two iterations
        n_trials = 30
        n_best = 10
        locations_best, covariances_best, _, _ = select_candidates(
            X,
            n_support,
            n_trials=n_trials,
            select=n_best,
            n_iter=2,
            cov_computation_method=cov_computation_method,
            random_state=random_state)
        # 2. Select the best couple on the full dataset amongst the 10
        locations_full, covariances_full, supports_full, d = select_candidates(
            X,
            n_support,
            n_trials=(locations_best, covariances_best),
            select=1,
            cov_computation_method=cov_computation_method,
            random_state=random_state)
        location = locations_full[0]
        covariance = covariances_full[0]
        support = supports_full[0]
        dist = d[0]

    return location, covariance, support, dist
def write_exp_stds(grid, name):

    net = grid

    pp.rundcpp(net)
    ppc = net["_ppc"]

    ##### Setup Grid fluctuation parameters and constraints ########

    ## thresold on shift significance in DC-PF Eqs
    ## pwr = pwr_shf + np.real(bbus)*va
    ##

    shf_eps = 1e-4

    ## Std for fluctuating loads divided by their nominal values:
    ## for small grids values 0.5-1 are realistic
    ## larger grids have cov_std = 0.1 -- 0.3 or less
    ##
    cov_std = 0.25
    if name == 'grid118i':
        cov_std = 0.1

    ## Phase angle difference limit
    ## small grids: pi/8 -- pi/6
    ## large grids: pi/3 -- pi/4
    ##

    bnd = math.pi / 4

    ### Cut small probabilities threshold
    ### discard all probabilities than thrs* prb(closest hyperplane)
    ###
    ###
    ### Crucially affects time performance
    ###

    thrs = 0.001

    ### Number of samples used in experiments
    ### 500 is often enough
    ### 10000 is a default value supresses the variance

    nsmp = 1000

    ### Step-sizes for KL and Var minimization
    ### works well with 0.1-0.01

    eta_vm = 0.1
    eta_kl = 0.1

    ### Rounding threshold in optimization:
    ### if a (normalized on the simplex) hpl probability becomes lower then 0.001
    ### we increase it to this level
    ###
    ### Crucially affects numerical stability
    ###

    eps = 0.001

    ##### Setup power grid case in a convenient form for further sampling ########

    ### find number of lines (m) and buses(n)

    m = net.line['to_bus'].size
    n = net.res_bus['p_mw'].size

    ### Construct adjacency matrix
    ###

    adj = np.zeros((2 * m, n))
    for i in range(0, m):
        adj[i, net.line['to_bus'][i]] = 1
        adj[i, net.line['from_bus'][i]] = -1
        adj[i + m, net.line['to_bus'][i]] = -1
        adj[i + m, net.line['from_bus'][i]] = 1

    ### DC power flow equations have a form:
    ###
    ### pwr = pwr_shf + np.real(bbus)*va
    ### (compute all parameters)

    bbus = np.real(ppc['internal']['Bbus'])
    va = math.pi * net.res_bus['va_degree'] / 180
    pwr = -net.res_bus['p_mw']
    pwr_shf = pwr - bbus @ va

    ### pwr_shf is significant or not:
    ###
    ### if the shift is small: zero it out
    ### (simplifies testing and removes "math zeros")

    print("significant shift: ", np.max(pwr_shf) - np.min(pwr_shf) > shf_eps)
    if (np.max(pwr_shf) - np.min(pwr_shf) < shf_eps):
        pwr_shf[range(0, n)] = 0

    ### Phase angle differences:
    ###
    ### va = pinv(bbus)*(pwr - pwr_shf)
    ### va_d = adj*va = adj*pinv(bbus)*(pwr - pwr_shf)
    ### va_d = pf_mat*pwr - va_shf

    bbus_pinv = pinvh(bbus.todense())
    pf_mat = adj @ bbus_pinv
    va_shf = pf_mat @ pwr_shf

    ### Voltage angle differences:
    ###

    va_d = pf_mat @ pwr - va_shf

    ##### Distribution of fluctuations ######

    ### assume the only one slack (a higher-level grid) in the grid
    ### supress all its fluctuations and balance the grid
    ###
    ### TODO: adjust to a general case
    ###

    slck = net.ext_grid['bus']
    slck_mat = np.eye(n)
    slck_mat[slck] = -1
    ## assign values to the whole array
    slck_mat[slck, slck] = 0
    # and zero out for the slack itself

    ### set fluctuating components: either loads or gens or both
    ###

    loads = np.zeros(n)
    gens = np.zeros(n)
    ctrls = np.zeros(n)
    ## controllable loads + gens

    loads[net.load['bus']] = -net.res_load['p_mw']
    gens[net.gen['bus']] = net.res_gen['p_mw']
    ctrls = loads + gens

    ### assume only loads are fluctuating
    ###

    xi = loads

    ### Set covariance matrix and mean
    ###
    ### cov_sq = square of the covariance matrix
    ### Gaussian rv with covariance \Sigma is \Sigma^{1/2} * std_normal_rv
    ###
    ### TODO: change to LU/cholesky factorization
    ###

    cov_sq = cov_std * np.diag(np.abs(xi))

    ### Final equations with fluctuations xi are then
    ###
    ### w/o fluctuations:
    ### va_d = pf_mat*pwr - va_shf
    ### with fluctuations:
    ### va_d = pf_mat@(pwr + slck_mat*cov_sq*xi) - va_shf
    ### va_d = pf_mat@pwr - va_shf + (pf_mat@(slck_mat@cov_sq))@xi_std
    ### va_d = mu + A@xi_std
    ### where xi_std is a standard normal with only fluctuating components
    ###

    A = (pf_mat @ slck_mat) @ cov_sq
    mu = pf_mat @ pwr - va_shf

    ### Feasibility Polytope Inequalities
    ### bnd \ge va_d = mu_f + A_f@xi_std
    ### incorporates both va_d \le b and va_d \ge -b as we have va_d's with 2 signs
    ###

    b = np.ones(2 * m) * bnd

    ### normalize the matrices to make it easier to compute a failure probability
    ###

    ### compute row norms of A
    nrms = np.maximum(la.norm(A, axis=1), 1e-20)

    ### normalize A and b so that b_n\ge A_n*xi_std
    b_n = (b - mu) / nrms
    A_n = [A[i] / nrms[i] for i in range(0, 2 * m)]

    ##### Assest equations feasibility #######

    ### Power balance check
    ###

    print("Eqs balance check:", 0 == np.sum(np.sign(mu)))

    ### check positiveness of bnd - mu_f = RHS - LHS
    ###

    print("Inqs. feasibility check: ", np.min(b - mu) > 0)
    print("Min gap in phase angles = min(RHS - LHS)",
          np.min(b - mu))  ## positive value, otherwise the grid fails whp
    print("The RHS (phase angle diff max) = ", bnd)

    ### Compute probabilities:
    ### prb: probability of each hpl failure
    ### p_up, p_dwn: upper and lower bounds
    ###

    prb = norm.cdf(-b_n)
    p_up = np.sum(prb)
    p_dwn = np.max(prb)

    print("the union bound (upper):", p_up)
    print("the max bound (lower):", p_dwn)

    ### Keep only valuable probabilities:
    ### - use the union bound for all the rest
    ### - keep only the prbs higher than the thrs* p_dwn

    prbh_id = (prb > thrs * p_dwn)
    prb_rmd = np.sum(prb[~(prb > thrs * p_dwn)])

    print("Remainder probability (omitted):", prb_rmd)

    ############ Preliminary steps for Sampling and Importance Sampling ############

    ### normalize all active probabilities to one
    ### as we only play a hyperplane out of them
    ###
    ### NB: crucial steps in performance optimization
    ###

    x_id = np.where(prbh_id == True)[0]

    ### local normalized versions of A and b,
    ### reduced in size: number of rows now is equal to a number of constraints
    ### that have a high probability of violation
    ###

    x_bn = b_n[x_id]

    ### we do not care about the full matrix A and vector b
    ### only about important parts of them
    A_n = np.array(A_n)
    x_An = A_n[x_id]

    print("# hpls we care of: ", len(x_bn))

    ############# Monte-Carlo ##################

    rv = norm()
    x_std = norm.rvs(size=[n, nsmp])
    smp = x_An @ x_std

    ### fls_mc = failures in Monte-Carlo, e.g.
    ### when MC discovers a failure
    ###

    fls_mc = sum((x_bn <= smp.T[:]).T)
    print("Max # of hlps a sample if out of: ", np.max(fls_mc))

    ### MC failure expectation and std
    ###

    mc_exp = (1 - np.sum(fls_mc == 0) / nsmp) * (1 - prb_rmd) + prb_rmd
    mc_std = (1 - prb_rmd) / math.sqrt(nsmp)
    # violation_dict = {}
    for i in range(0, np.max(fls_mc) + 1):
        print(i, "hpls violated (exactly) vs # cases", np.sum(fls_mc == i))
    #     violation_dict[i] = int(np.sum(fls_mc == i))

    print("\nMC(exp, std):", (mc_exp, mc_std))

    ### write into file
    # path_to_viol_dirs = os.path.join("results", "hplns_violations")
    # with open(os.path.join(path_to_viol_dirs, "grid3120", "result.json"), 'w+') as fp:
    #     json.dump(violation_dict, fp)

    ############# ALOE ##################
    ###
    ### Exactly follows to the Owen/Maximov/Chertkov paper, EJOS'19
    ###
    ### sample z ~ N(0, I_n)
    ### sample u ~ U(0,1)
    ### compute y = F^{-1}(u F(-b_i))
    ### compute x = - (a_i * y + (I - a_i.T * a_i) z)
    ###
    ### Ouput: union bound divided by the expected failure multiplicity
    ###

    ### Initialize samplers
    ###
    ### sample z ~ N(0, I_n) and u ~ U(0,1)
    ###

    nsmp_ = 10000
    rv = norm()
    rv_u = uniform()
    z = norm.rvs(size=[nsmp_, n])
    u = uniform.rvs(size=[nsmp_])

    ### x_alph is a vector of ALOE probabilities
    ### normalized by a unit simplex
    ###

    x_alph = prb[prbh_id] / np.sum(prb[prbh_id])
    print("ALOE prbs for major hpls: ", x_alph)

    ### _hpl: how many smpls beyond each of the hpls
    ###

    _hpl = multinomial.rvs(n=nsmp_, p=x_alph)

    ### print("# samples per hpl", _hpl)

    ### Get cummulative sums, which are easier to work with
    _hpl = list(itertools.accumulate(_hpl))
    _hpl = np.array(_hpl)

    ### print("cusum of # hpls", _hpl)

    ### Generate samples
    ### x_aloe -- samples generated by ALOE
    ###
    ### TODO: seems optimizable, but I am not sure about memory mgmnt in python
    x_aloe = np.zeros([nsmp_, n])

    # index of the active hyperplane
    hpl_id = 0

    ### get samples x_aloe according to the algorithm
    #for i in tqdm(range(0,nsmp)):
    for i in range(0, nsmp_):
        ### get index of a hyperplane to sample beyond
        hpl_id = (hpl_id, hpl_id + 1)[i >= _hpl[hpl_id]]
        y = norm.ppf(u[i] * norm.cdf(-x_bn[hpl_id]))
        x_aloe[i] = -x_An[hpl_id] * y - z[i] + np.outer(
            x_An[hpl_id], x_An[hpl_id]) @ z[i]

    ### test how many constraints are violated
    smp = x_An @ x_aloe.T

    ### compute expectation and std final and history
    aloe_exp = p_up * np.sum(
        1. / np.sum(x_bn <= smp.T[:], axis=1)) / nsmp_ + prb_rmd
    aloe_std = p_up * math.sqrt(2 * len(_hpl)) / math.sqrt(nsmp_)
    # indeed len(_hpl) instead of 2*m in the Thrm
    aloe_exp_history = [
        p_up * np.sum(1. / np.sum(x_bn <=
                                  (x_An @ x_aloe[:i, :].T).T, axis=1)) /
        (i + 1) + prb_rmd for i in range(0, nsmp_)
    ]
    #aloe_std_history = [p_up*math.sqrt(2*len(_hpl))/math.sqrt(i + 1) for i in range(0, nsmp_)]
    aloe_std_history = [np.std(aloe_exp_history[:i + 1]) for i in range(nsmp)]

    print("ALOE (exp, std)", (aloe_exp, aloe_std))

    ####### Optimization approach ######
    #######
    ####### Variance Minimization ######
    #######

    ### setup the initial values

    eta = eta_vm
    md_var = 0
    md_exp = 0
    grad = np.zeros(len(x_bn))
    #gradient on each iteration
    _hpl = np.zeros(nsmp)
    # hpls choosen by the method

    ### intentionally use a copy instead of a reference
    ### alph is a vector of weigths to be updated in algorithm
    ###

    alph = x_alph[:]

    ### history of probability estimate and std
    md_exp_history = []
    md_std_history = []

    # values for Phi (x_bn)
    x_phi = [norm.cdf(-x_bn[i]) for i in range(0, len(x_bn))]

    ### grad normalization by prbs[i] factor is introduced to make computations numerically stable
    ###

    prbs = prb[prbh_id]

    for i in tqdm(range(0, nsmp)):

        ### sample x according to current alph
        hpl_id = np.where(
            multinomial.rvs(n=1, p=alph, size=1, random_state=None)[0] == 1)[0]
        _hpl[i] = hpl_id

        ### generate a sample following to the ALOE procedure
        y = norm.ppf(u[i] * norm.cdf(-x_bn[hpl_id]))

        x_smp = -x_An[hpl_id] * y - z[i] + np.outer(x_An[hpl_id],
                                                    x_An[hpl_id]) @ z[i]

        ### the RHS' to be compared with x_bn
        x_smp = x_An @ x_smp.T

        ### results of constraints violations for each generated object
        cns_vlt = (x_bn <= x_smp.T[:])[0]

        ### weight vector defined by the multiplicity of constraint violation for each sample
        wgt = 1. / np.sum(np.multiply(cns_vlt, np.multiply(alph, 1. / x_alph)))

        ### compute gradient of the variance, see the paper (our + OMC) for details
        grad = [
            -p_up * p_up * wgt * wgt * norm.pdf(x_smp[k])[0] * cns_vlt[k] /
            prbs[k] for k in range(len(x_smp))
        ]
        grad = np.array(grad)

        ### The gradient is high -- signal about emergency as it can zero out all weights
        if (la.norm(eta * grad) > 1e4):
            print(
                "\n##############    Extremely high gradient      ############\n"
            )
            print("Iteration: ", i, "\nGradient:", grad)

        ### make a ``simplex MD'' update
        alph = [
            math.exp(-eta * grad[k]) * alph[k] for k in range(0, len(x_smp))
        ]

        ### enter if some coordinates are too small and may cause numerical instability
        ### increase the corresponding weigths
        if (np.min(alph) < eps):
            print("###########  some coordinates are small  #################")
            alph = [alph[k] + eps for k in range(0, len(x_bn))]

        ### make a projection to the unit simplex
        alph = alph / np.sum(alph)

        ### adjust contribution to the errors
        md_exp = md_exp + wgt
        md_exp_history.append(p_up * md_exp / (i + 1) + prb_rmd)
        md_var = md_var + p_up * np.dot(grad.T, grad)
        #md_std_history.append(p_up * math.sqrt(md_var) / (i + 1))

    md_std_history = [np.std(md_exp_history[:i + 1]) for i in range(nsmp)]
    print("Optimal weigths of MD-Var minimization: ", alph)
    print("Optimal weigths of ALOE", x_alph)

    ### normalize errors, compute standard deviation
    md_exp = p_up * md_exp / nsmp + prb_rmd
    md_std = p_up * math.sqrt(md_var) / nsmp

    print("MD-Var (exp, std)", (md_exp, md_std))
    #print("assert normalization:", np.sum(alph), np.sum(x_alph))

    ####### Optimization approach ######
    #######
    ####### KL Minimization ######
    #######

    ### SMD step-size
    eta = eta_kl

    ### setup initial values
    kl_exp = 0
    kl_var = 0
    grad = np.zeros(len(x_bn))
    _hpl = np.zeros(nsmp)
    ## _hpl[i] = beyond which hpl we sample on iteration i

    ### intentionally use a copy instead of a reference
    ### alph is an optimization variable
    alph = x_alph[:]

    ### history of probability estimate and std
    kl_exp_history = []
    kl_std_history = []

    ### this normalization factor is introduced to make computations numerically stable
    prbs = prb[prbh_id]

    for i in tqdm(range(0, nsmp)):  #,miniters=500):

        ### sample x according to current alph
        hpl_id = np.where(
            multinomial.rvs(n=1, p=alph, size=1, random_state=None)[0] == 1)[0]
        _hpl[i] = hpl_id

        ### generate a sample accordint to ALOE
        y = norm.ppf(u[i] * norm.cdf(-x_bn[hpl_id]))
        x_smp = -x_An[hpl_id] * y - z[i] + np.outer(x_An[hpl_id],
                                                    x_An[hpl_id]) @ z[i]

        ### RHS to compare with x_bn
        x_smp = x_An @ x_smp.T

        ### results of constraints violations for the generated object
        cns_vlt = (x_bn <= x_smp.T[:])[0]

        ### object weight which is set according to ALOE
        wgt = 1. / np.sum(np.multiply(cns_vlt, np.multiply(alph, 1. / x_alph)))

        # the KL divergence's gradient
        grad = [
            -p_up * wgt * norm.pdf(x_smp[k])[0] * cns_vlt[k] / prbs[k]
            for k in range(len(x_smp))
        ]
        grad = np.array(grad)

        ### The gradient is high -- signal about emergency as it can zero out all weights
        if (la.norm(eta * grad) > 1e4):
            print(
                "\n##############    Extremely high gradient      ############\n"
            )
            print("Iteration: ", i, "\nGradient:", grad)

        ### make a ``simplex MD'' update
        alph = [
            math.exp(-eta * grad[k]) * alph[k] for k in range(0, len(x_smp))
        ]

        ### enter if some coordinates are too small and may cause numerical instability
        ### increase the corresponding weigths
        if (np.min(alph) < eps):
            print("###########  some coordinates are small  #################")
            alph = [alph[k] + eps for k in range(0, len(x_bn))]

        ### make a projection to the unit simplex
        alph = alph / np.sum(alph)

        ### adjust contribution to the errors
        kl_exp = kl_exp + wgt
        kl_exp_history.append(p_up * kl_exp / (i + 1) + prb_rmd)
        kl_var = kl_var + p_up * np.dot(grad.T, grad) * wgt
        #kl_std_history.append(p_up * math.sqrt(kl_var) / (i + 1))
    kl_std_history = [np.std(kl_exp_history[:i + 1]) for i in range(nsmp)]
    print("Optimal weigths of MD-KL minimization: ", alph)
    print("Optimal weigths of ALOE", x_alph)

    ### normalize errors
    kl_exp = p_up * kl_exp / nsmp + prb_rmd
    kl_std = p_up * math.sqrt(kl_var) / nsmp
    print("MD-KL (exp, std)", (kl_exp, kl_std))

    #print("assert normalization:", np.sum(alph), np.sum(x_alph))

    ##############  Output all probabilities  ##################

    print("the union bound (up):", p_up)
    print("the max bound (lower):", p_dwn)

    print("MC(exp, std):", mc_exp, mc_std)
    print("ALOE(exp, std)", aloe_exp, aloe_std)

    print("MD-Var(exp, var)", md_exp, md_std)
    print("MD-KL(exp, var)", kl_exp, kl_std)
    output_dict = {}
    output_dict["MD-Var-exp"] = [float(a) for a in md_exp_history]
    output_dict["MD-KL-exp"] = [float(a) for a in kl_exp_history]
    output_dict["ALOE-Var-exp"] = [float(a) for a in aloe_exp_history][:nsmp]
    output_dict["MD-Var-std"] = [float(a) for a in md_std_history]
    output_dict["MD-KL-std"] = [float(a) for a in kl_std_history]
    output_dict["ALOE-Var-std"] = [float(a) for a in aloe_std_history][:nsmp]
    output_dict["ALOE-inf-exp"] = float(aloe_exp_history[-1])

    with open(name + ".json", 'w+') as fp:
        json.dump(output_dict, fp)
コード例 #32
0
def latent_graphical_lasso(
    emp_cov,
    alpha=1.0,
    tau=1.0,
    rho=1.0,
    max_iter=100,
    verbose=False,
    tol=1e-4,
    rtol=1e-2,
    return_history=False,
    return_n_iter=True,
    update_rho_options=None,
    compute_objective=True,
    init="empirical",
):
    r"""Latent variable graphical lasso solver via ADMM.

    Solves the following problem:
        min - log_likelihood(S, K-L) + alpha ||K||_{od,1} + tau ||L_i||_*

    where S = (1/n) X^T \times X is the empirical covariance of the data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : array-like
        Empirical covariance matrix.
    alpha, tau : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    K, L : np.array, 2-dimensional, size (d x d)
        Solution to the problem.
    S : np.array, 2 dimensional
        Empirical covariance matrix.
    n_iter : int
        If return_n_iter, returns the number of iterations before convergence.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    K = init_precision(emp_cov, mode=init)
    L = np.zeros_like(emp_cov)
    U = np.zeros_like(emp_cov)
    R_old = np.zeros_like(emp_cov)

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = K - L - U
        A += A.T
        A /= 2.0
        R = prox_logdet(emp_cov - rho * A, lamda=1.0 / rho)

        A = L + R + U
        K = soft_thresholding(A, lamda=alpha / rho)

        A = K - R - U
        A += A.T
        A /= 2.0
        L = prox_trace_indicator(A, lamda=tau / rho)

        # update residuals
        U += R - K + L

        # diagnostics, reporting, termination checks
        obj = objective(emp_cov, R, K, L, alpha,
                        tau) if compute_objective else np.nan
        rnorm = np.linalg.norm(R - K + L)
        snorm = rho * np.linalg.norm(R - R_old)
        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(R.size) * tol +
            rtol * max(np.linalg.norm(R), np.linalg.norm(K - L)),
            e_dual=np.sqrt(R.size) * tol + rtol * rho * np.linalg.norm(U),
        )
        R_old = R.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break
        if check.obj == np.inf:
            break
        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = linalg.pinvh(K)
    return_list = [K, L, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
コード例 #33
0
    def _fit(self, pairs, y):
        if not HAS_SKGGM:
            if self.verbose:
                print("SDML will use scikit-learn's graphical lasso solver.")
        else:
            if self.verbose:
                print("SDML will use skggm's graphical lasso solver.")
        pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples')

        # set up (the inverse of) the prior M
        # if the prior is the default (None), we raise a warning
        _, prior_inv = _initialize_metric_mahalanobis(
            pairs,
            self.prior,
            return_inverse=True,
            strict_pd=True,
            matrix_name='prior',
            random_state=self.random_state)
        diff = pairs[:, 0] - pairs[:, 1]
        loss_matrix = (diff.T * y).dot(diff)
        emp_cov = prior_inv + self.balance_param * loss_matrix

        # our initialization will be the matrix with emp_cov's eigenvalues,
        # with a constant added so that they are all positive (plus an epsilon
        # to ensure definiteness). This is empirical.
        w, V = np.linalg.eigh(emp_cov)
        min_eigval = np.min(w)
        if min_eigval < 0.:
            warnings.warn(
                "Warning, the input matrix of graphical lasso is not "
                "positive semi-definite (PSD). The algorithm may diverge, "
                "and lead to degenerate solutions. "
                "To prevent that, try to decrease the balance parameter "
                "`balance_param` and/or to set prior='identity'.",
                ConvergenceWarning)
            w -= min_eigval  # we translate the eigenvalues to make them all positive
        w += 1e-10  # we add a small offset to avoid definiteness problems
        sigma0 = (V * w).dot(V.T)
        try:
            if HAS_SKGGM:
                theta0 = pinvh(sigma0)
                M, _, _, _, _, _ = quic(emp_cov,
                                        lam=self.sparsity_param,
                                        msg=self.verbose,
                                        Theta0=theta0,
                                        Sigma0=sigma0)
            else:
                _, M = graphical_lasso(emp_cov,
                                       alpha=self.sparsity_param,
                                       verbose=self.verbose,
                                       cov_init=sigma0)
            raised_error = None
            w_mahalanobis, _ = np.linalg.eigh(M)
            not_spd = any(w_mahalanobis < 0.)
            not_finite = not np.isfinite(M).all()
        except Exception as e:
            raised_error = e
            not_spd = False  # not_spd not applicable here so we set to False
            not_finite = False  # not_finite not applicable here so we set to False
        if raised_error is not None or not_spd or not_finite:
            msg = ("There was a problem in SDML when using {}'s graphical "
                   "lasso solver."
                   ).format("skggm" if HAS_SKGGM else "scikit-learn")
            if not HAS_SKGGM:
                skggm_advice = (
                    " skggm's graphical lasso can sometimes converge "
                    "on non SPD cases where scikit-learn's graphical "
                    "lasso fails to converge. Try to install skggm and "
                    "rerun the algorithm (see the README.md for the "
                    "right version of skggm).")
                msg += skggm_advice
            if raised_error is not None:
                msg += " The following error message was thrown: {}.".format(
                    raised_error)
            raise RuntimeError(msg)

        self.components_ = components_from_metric(np.atleast_2d(M))
        return self
コード例 #34
0
    def build(self, wfn, integrals):
        """Builds the Hamiltonian matrix from a list of trajectories."""
        n_alive = wfn.nalive

        if integrals.hermitian:
            n_elem = int(n_alive * (n_alive + 1) / 2)
        else:
            n_elem = n_alive * n_alive

        if self.mat_dict['heff'].shape != (n_alive, n_alive):
            self.mat_dict['t'] = np.zeros((n_alive, n_alive), dtype=complex)
            self.mat_dict['v'] = np.zeros((n_alive, n_alive), dtype=complex)
            self.mat_dict['h'] = np.zeros((n_alive, n_alive), dtype=complex)
            self.mat_dict['s_traj'] = np.zeros((n_alive, n_alive),
                                               dtype=complex)
            self.mat_dict['s_nuc'] = np.zeros((n_alive, n_alive),
                                              dtype=complex)
            self.mat_dict['s_elec'] = np.zeros((n_alive, n_alive),
                                               dtype=complex)
            self.mat_dict['s'] = np.zeros((n_alive, n_alive), dtype=complex)
            self.mat_dict['sinv'] = np.zeros((n_alive, n_alive), dtype=complex)
            self.mat_dict['sdot'] = np.zeros((n_alive, n_alive), dtype=complex)
            self.mat_dict['heff'] = np.zeros((n_alive, n_alive), dtype=complex)

        # now evaluate the hamiltonian matrix
        for ij in range(n_elem):
            if integrals.hermitian:
                i, j = self.ut_ind(ij)
            else:
                i, j = self.sq_ind(ij, n_alive)

            ii = wfn.alive[i]
            jj = wfn.alive[j]

            s_nuc = integrals.nuc_overlap(wfn.traj[ii], wfn.traj[jj])
            s_elec = integrals.elec_overlap(wfn.traj[ii], wfn.traj[jj])

            # nuclear overlap matrix (excluding electronic component)
            self.mat_dict['s_nuc'][i, j] = s_nuc

            # nuclear overlap matrix (excluding electronic component)
            self.mat_dict['s_elec'][i, j] = s_elec

            # compute overlap of trajectories (different from S, which may or may
            # not involve integration in a gaussian basis
            self.mat_dict['s_traj'][i, j] = integrals.traj_overlap(
                wfn.traj[ii], wfn.traj[jj])

            # overlap matrix (including electronic component)
            self.mat_dict['s'][i, j] = integrals.s_integral(wfn.traj[ii],
                                                            wfn.traj[jj],
                                                            nuc_ovrlp=s_nuc,
                                                            elec_ovrlp=s_elec)

            # time-derivative of the overlap matrix (not hermitian in general)
            self.mat_dict['sdot'][i, j] = integrals.sdot_integral(
                wfn.traj[ii], wfn.traj[jj], nuc_ovrlp=s_nuc, elec_ovrlp=s_elec)

            # kinetic energy matrix
            self.mat_dict['t'][i, j] = integrals.t_integral(wfn.traj[ii],
                                                            wfn.traj[jj],
                                                            nuc_ovrlp=s_nuc,
                                                            elec_ovrlp=s_elec)

            # potential energy matrix
            self.mat_dict['v'][i, j] = integrals.v_integral(wfn.traj[ii],
                                                            wfn.traj[jj],
                                                            nuc_ovrlp=s_nuc,
                                                            elec_ovrlp=s_elec)

            # Hamiltonian matrix in non-orthogonal basis
            self.mat_dict['h'][
                i, j] = self.mat_dict['t'][i, j] + self.mat_dict['v'][i, j]

            # if hermitian matrix, set (j,i) indices
            if integrals.hermitian and i != j:
                self.mat_dict['s_nuc'][j, i] = self.mat_dict['s_nuc'][
                    i, j].conjugate()
                self.mat_dict['s_elec'][j, i] = self.mat_dict['s_elec'][
                    i, j].conjugate()
                self.mat_dict['s_traj'][j, i] = self.mat_dict['s_traj'][
                    i, j].conjugate()
                self.mat_dict['s'][j, i] = self.mat_dict['s'][i, j].conjugate()
                self.mat_dict['sdot'][j, i] = integrals.sdot_integral(
                    wfn.traj[jj],
                    wfn.traj[ii],
                    nuc_ovrlp=self.mat_dict['s_nuc'][j, i],
                    elec_ovrlp=self.mat_dict['s_elec'][j, i])
                self.mat_dict['t'][j, i] = self.mat_dict['t'][i, j].conjugate()
                self.mat_dict['v'][j, i] = self.mat_dict['v'][i, j].conjugate()
                self.mat_dict['h'][j, i] = self.mat_dict['h'][i, j].conjugate()

        if integrals.hermitian:
            # compute the S^-1, needed to compute Heff
            timings.start('linalg.pinvh')
            self.mat_dict['sinv'] = sp_linalg.pinvh(self.mat_dict['s'])
            #Sinv, cond = linalg.pseudo_inverse2(S)
            timings.stop('linalg.pinvh')
        else:
            # compute the S^-1, needed to compute Heff
            timings.start('hamiltonian.pseudo_inverse')
            self.mat_dict['sinv'], cond = linalg.pseudo_inverse(
                self.mat_dict['s'])
            timings.stop('hamiltonian.pseudo_inverse')

        self.mat_dict['heff'] = np.dot(
            self.mat_dict['sinv'],
            self.mat_dict['h'] - 1j * self.mat_dict['sdot'])
コード例 #35
0
def mutual_incoherence(X_relevant, X_irelevant):
    """Mutual incoherence, as defined by formula (26a) of [Wainwright2006].
    """
    projector = np.dot(np.dot(X_irelevant.T, X_relevant),
                       linalg.pinvh(np.dot(X_relevant.T, X_relevant)))
    return np.max(np.abs(projector).sum(axis=1))
コード例 #36
0
 def __init__(self, nu, S):
     self.nu = nu
     self.S = S
     self.D = S.shape[0]
     self.inv_S = linalg.pinvh(S)
コード例 #37
0
# write the estimated values to a SBtab ifle (out1.tsv)
reaction_df = pd.DataFrame(zip(pp.reaction_ids, pp.reaction_formulas,
                               standard_dg_prime_in_kJ_per_mol,
                               dg_sigma_in_kJ_per_mol),
                           columns=[
                               "reaction_id", "reaction_formula",
                               "standard_dg_prime_in_kJ_per_mol",
                               "dg_sigma_in_kJ_per_mol"
                           ])
sbtabdoc = sbtab.SBtab.SBtabDocument()
sbtabdoc.add_sbtab(
    sbtab.SBtab.SBtabTable.from_data_frame(reaction_df.applymap(str),
                                           table_id="Thermodynamics",
                                           table_type="Quantity"))
sbtabdoc.write("out1.tsv")

# Save the Precision matrix to dg_precision.mat in the Matlab binary file format
try:
    _, dg_precision = cc.standard_dg_prime_multi(
        pp.reactions, uncertainty_representation="precision")
    dg_precision = dg_precision.m_as("mol**2/kJ**2")
except ValueError:
    sys.stderr.write(
        "uncertainty_representation = 'precision' is not implemented "
        "in this version of equilibrator-api.\ninverting the covariance matrix "
        "using the pseudoinverse function of scipy.")
    dg_precision = pinvh(dg_cov.m_as("kJ**2/mol**2"))

mdic = {"dg_precision": dg_precision, "rxn_id": list(pp.reaction_ids)}
scipy.io.savemat("dg_precision.mat", mdic)
コード例 #38
0
def fit(x, y_onehot, niter_max, l2):
    #print(niter_max)
    l, n = x.shape
    m = y_onehot.shape[1]  # number of categories

    x_av = np.mean(x, axis=0)
    dx = x - x_av
    c = np.cov(dx, rowvar=False, bias=True)

    # 2019.07.16:  l2 = lamda/(2L)
    c += l2 * np.identity(n) / (2 * l)
    c_inv = linalg.pinvh(c)

    H0 = np.zeros(m)
    W = np.zeros((n, m))

    for i in range(m):
        y = y_onehot[:, i]  # y = {0,1}
        y1 = 2 * y - 1  # y1 = {-1,1}
        # initial values
        h0 = 0.
        w = np.random.normal(0.0, 1. / np.sqrt(n), size=(n))

        cost = np.full(niter_max, 100.)
        for iloop in range(niter_max):
            h = h0 + x.dot(w)
            y1_model = np.tanh(h / 2.)

            # stopping criterion
            #p = 1/(1+np.exp(-h))

            #cost[iloop] = ((p-y)**2).mean()

            # 2019.07.12: lost function
            cost[iloop] = ((y1[:] - y1_model[:])**2).mean()
            #cost[iloop] = (-y[:]*np.log(p) - (1-y)*np.log(1-p)).mean()

            #h_test = h0 + x_test.dot(w)
            #p_test = 1/(1+np.exp(-h_test))
            #cost[iloop] = ((p_test-y_test)**2).mean()

            if iloop > 0 and cost[iloop] >= cost[iloop - 1]: break

            # update local field
            t = h != 0
            h[t] *= y1[t] / y1_model[t]
            h[~t] = 2 * y1[~t]

            # find w from h
            h_av = h.mean()
            dh = h - h_av
            dhdx = dh[:, np.newaxis] * dx[:, :]

            dhdx_av = dhdx.mean(axis=0)
            w = c_inv.dot(dhdx_av)
            h0 = h_av - x_av.dot(w)

        H0[i] = h0
        W[:, i] = w

    return H0, W
コード例 #39
0
    def compute_density_gCorr(self, use_variance=True, comp_err=True):
        # TODO: matrix A should be in sparse format!

        # compute changes in free energy
        if self.Fij_array is None:
            self.compute_deltaFs_grads_semisum()

        if self.verb:
            print("gCorr density estimation started")
            sec = time.time()

        # compute adjacency matrix and cumulative changes
        A = sparse.lil_matrix((self.N, self.N), dtype=np.float_)

        supp_deltaF = sparse.lil_matrix((self.N, self.N), dtype=np.float_)

        # define redundancy factor for each A matrix entry as the geometric mean of the 2 corresponding k*
        k1 = self.kstar[self.nind_list[:, 0]]
        k2 = self.kstar[self.nind_list[:, 1]]
        redundancy = np.sqrt(k1 * k2)

        if use_variance:
            for nspar, indices in enumerate(self.nind_list):
                i = indices[0]
                j = indices[1]
                # tmp = 1.0 / self.Fij_var_array[nspar]
                tmp = 1.0 / self.Fij_var_array[nspar] / redundancy[nspar]
                A[i, j] = -tmp
                supp_deltaF[i, j] = self.Fij_array[nspar] * tmp
        else:
            for nspar, indices in enumerate(self.nind_list):
                i = indices[0]
                j = indices[1]
                # A[i, j] = -1.0
                A[i, j] = -1.0 / redundancy[nspar]
                supp_deltaF[i, j] = self.Fij_array[nspar]

        A = sparse.lil_matrix(A + A.transpose())

        diag = np.array(-A.sum(axis=1)).reshape((self.N,))

        A.setdiag(diag)

        # print("Diag = {}".format(diag))

        deltaFcum = np.array(supp_deltaF.sum(axis=0)).reshape((self.N,)) - np.array(
            supp_deltaF.sum(axis=1)
        ).reshape((self.N,))

        sec2 = time.time()
        if self.verb:
            print("{0:0.2f} seconds to fill sparse matrix".format(sec2 - sec))

        log_den = sparse.linalg.spsolve(A.tocsr(), deltaFcum)

        if self.verb:
            print("{0:0.2f} seconds to solve linear system".format(time.time() - sec2))
        sec2 = time.time()

        self.log_den = log_den
        # self.log_den_err = np.sqrt((sparse.linalg.inv(A.tocsc())).diagonal())

        if comp_err is True:
            self.A = A.todense()
            self.B = slin.pinvh(self.A)
            # self.B = slin.inv(self.A)
            self.log_den_err = np.sqrt(np.diag(self.B))

        if self.verb:
            print("{0:0.2f} seconds inverting A matrix".format(time.time() - sec2))
        sec2 = time.time()

        # self.log_den_err = np.sqrt(np.diag(slin.pinvh(A.todense())))
        # self.log_den_err = np.sqrt(diag/np.array(np.sum(np.square(A.todense()),axis=1)).reshape(self.N,))

        sec2 = time.time()
        if self.verb:
            print("{0:0.2f} seconds for gCorr density estimation".format(sec2 - sec))
コード例 #40
0
def _c_step(X,
            n_support,
            random_state,
            remaining_iterations=30,
            initial_estimates=None,
            verbose=False,
            cov_computation_method=empirical_covariance):
    n_samples, n_features = X.shape
    dist = np.inf

    # Initialisation
    support = np.zeros(n_samples, dtype=bool)
    if initial_estimates is None:
        # compute initial robust estimates from a random subset
        support[random_state.permutation(n_samples)[:n_support]] = True
    else:
        # get initial robust estimates from the function parameters
        location = initial_estimates[0]
        covariance = initial_estimates[1]
        # run a special iteration for that case (to get an initial support)
        precision = linalg.pinvh(covariance)
        X_centered = X - location
        dist = (np.dot(X_centered, precision) * X_centered).sum(1)
        # compute new estimates
        support[np.argsort(dist)[:n_support]] = True

    X_support = X[support]
    location = X_support.mean(0)
    covariance = cov_computation_method(X_support)

    # Iterative procedure for Minimum Covariance Determinant computation
    det = fast_logdet(covariance)
    # If the data already has singular covariance, calculate the precision,
    # as the loop below will not be entered.
    if np.isinf(det):
        precision = linalg.pinvh(covariance)

    previous_det = np.inf
    while (det < previous_det and remaining_iterations > 0
           and not np.isinf(det)):
        # save old estimates values
        previous_location = location
        previous_covariance = covariance
        previous_det = det
        previous_support = support
        # compute a new support from the full data set mahalanobis distances
        precision = linalg.pinvh(covariance)
        X_centered = X - location
        dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1)
        # compute new estimates
        support = np.zeros(n_samples, dtype=bool)
        support[np.argsort(dist)[:n_support]] = True
        X_support = X[support]
        location = X_support.mean(axis=0)
        covariance = cov_computation_method(X_support)
        det = fast_logdet(covariance)
        # update remaining iterations for early stopping
        remaining_iterations -= 1

    previous_dist = dist
    dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1)
    # Check if best fit already found (det => 0, logdet => -inf)
    if np.isinf(det):
        results = location, covariance, det, support, dist
    # Check convergence
    if np.allclose(det, previous_det):
        # c_step procedure converged
        if verbose:
            print("Optimal couple (location, covariance) found before"
                  " ending iterations (%d left)" % (remaining_iterations))
        results = location, covariance, det, support, dist
    elif det > previous_det:
        # determinant has increased (should not happen)
        warnings.warn(
            "Warning! det > previous_det (%.15f > %.15f)" %
            (det, previous_det), RuntimeWarning)
        results = previous_location, previous_covariance, \
            previous_det, previous_support, previous_dist

    # Check early stopping
    if remaining_iterations == 0:
        if verbose:
            print('Maximum number of iterations reached')
        results = location, covariance, det, support, dist

    return results
コード例 #41
0
def make_data(
    n=150,
    min_v=None,
    max_v=None,
    ns=None,
    p=3,
    q=3,
    tau_range=(0.25, 1.5),
    max_neighbors=8,
    random_state=None,
):
    """Generate random data to use for modelling species occupancy.

    Parameters
    ----------
    n : int, optional
        Number of sites. Defaults to 150.
    min_v : int, optional
        Minimum number of visits per site. If None, the maximum number is set
        to 2. Defaults to None.
    max_v : int, optional
        Maximum number of visits per site. If None, the maximum number is set
        to 10% of `n`. Defaults to None.
    ns : int, optional
        Number of surveyed sites out of `n`. If None, then this parameter is
        set to 50% of `n`. Defaults to None.
    p : int, optional
        Number covariates to use for species occupancy. Defaults to 3.
    q : int, optional
        Number of covariates to use for conditonal detection. Defaults to 3.
    tau_range : tuple, optional
        The range to randomly sample the precision parameter value from.
        Defaults to (0.25, 1.5).
    max_neighbors : int, optional
        Maximum number of neighbors per site. Should be one of {4, 8}. Default
        is 8.
    random_state : int, optional
        The seed to use for random number generation. Useful for reproducing
        generated data. If None then a random seed is chosen. Defaults to None.

    Returns
    -------
    Q : scipy.sparse.coo_matrix
        Spatial precision matrix
    W : Dict[int, np.ndarray]
        Dictionary of detection corariates where the keys are the site numbers
        of the surveyed sites and the values are arrays containing
        the design matrix of each corresponding site.
    X : np.ndarray
        Design matrix of species occupancy covariates.
    y : Dict[int, np.ndarray]
        Dictionary of survey data where the keys are the site numbers of the
        surveyed sites and the values are number arrays of 1's and 0's
        where 0's indicate "no detection" and 1's indicate "detection". The
        length of each array equals the number of visits in the corresponding
        site.
    alpha : np.ndarray
        True values of coefficients of detection covariates.
    beta : np.ndarray
        True values of coefficients of occupancy covariates.
    tau : np.ndarray
        True value of the precision parameter
    z : np.ndarray
        True occupancy state for all `n` sites.

    Raises
    ------
    ValueError
        When `n` is less than the default 150 sites.
        When `min_v` is less than 1.
        When `max_v` is less than 2 or greater than `n`.
        When `ns` is not a positive integer or greater than `n`.

    Examples
    --------
    >>> from occuspytial.utils import make_data
    >>> Q, W, X, y, alpha, beta, tau, z = make_data()
    >>> Q
    <150x150 sparse matrix of type '<class 'numpy.float64'>'
            with 1144 stored elements in COOrdinate format>
    >>> Q.toarray()
    array([[ 3., -1.,  0., ...,  0.,  0.,  0.],  # random
           [-1.,  5., -1., ...,  0.,  0.,  0.],
           [ 0., -1.,  5., ...,  0.,  0.,  0.],
           ...,
           [ 0.,  0.,  0., ...,  5., -1.,  0.],
           [ 0.,  0.,  0., ..., -1.,  5., -1.],
           [ 0.,  0.,  0., ...,  0., -1.,  3.]])
    >>> W
    {81: array([[ 1.        ,  1.01334565,  0.93150242],  # random
            [ 1.        ,  0.19276808, -1.71939657],
            [ 1.        ,  0.23866531,  0.0559545 ],
            [ 1.        ,  1.36102304,  1.73611887],
            [ 1.        ,  0.47247886,  0.73410589],
            [ 1.        , -1.9018879 ,  0.0097963 ]]),
     131: array([[ 1.        ,  1.67846707, -1.12476746],
            [ 1.        , -1.63131532, -1.32216705],
            [ 1.        , -1.37431173, -0.79734213],
            ...,
     21: array([[ 1.        ,  1.6416734 , -1.91642502],
            [ 1.        ,  0.2256312 , -1.68929118],
            [ 1.        ,  1.36953093,  1.08758129],
            [ 1.        , -1.08029212,  0.40219588]])}
    >>> X
    array([[ 1.        ,  0.71582433,  1.76344395],
           [ 1.        ,  0.8561976 ,  1.0520401 ],
           [ 1.        , -0.28051247,  0.16809809],
           ...,
           [ 1.        ,  0.86702262, -1.18225448],
           [ 1.        , -0.41346399, -0.9633078 ],
           [ 1.        , -0.23182363,  1.69930761]])
    >>> y
    {15: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]),  # random
     81: array([0, 0, 0, 1, 1, 0]),
     ...,
     21: array([0, 1, 0, 0])}
    >>> alpha
    array([-1.43291816, -0.87932413, -1.84927642])  # random
    >>> beta
    array([-0.62084322, -1.09645564, -0.93371374])  # random
    >>> tau
    1.415532667780688  # random
    >>> z
    array([0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1,
           1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1,
           1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0,
           0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0,
           0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1,
           0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0,
           0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0])
    """
    rng = get_generator(random_state)

    if n < 150:
        raise ValueError('n cant be lower than 150')

    if min_v is None:
        min_v = 2
    elif min_v < 1:
        raise ValueError('min_v needs to be at least 1')

    if max_v is None:
        max_v = n // 10
    elif max_v < 2:
        raise ValueError('max_v is too small')
    elif max_v > n:
        raise ValueError('max_v cant be more than n')

    if ns is None:
        ns = n // 2
    elif ns == 0:
        raise ValueError('ns should be positive')
    elif ns > n:
        raise ValueError('ns cant be more than n')

    surveyed_sites = rng.choice(range(n), size=ns, replace=False)
    visits_per_site = rng.integers(min_v, max_v, size=ns, endpoint=True)

    alpha = rng.standard_normal(q)
    beta = rng.standard_normal(p)
    tau = rng.uniform(*tau_range)

    factors = []
    for i in range(3, n):
        if (n % i) == 0:
            factors.append(i)

    row = rng.choice(factors)
    col = n // row

    Q = rand_precision_mat(row, col, max_neighbors=max_neighbors).astype(float)
    Q_pinv = pinvh(Q.toarray(), cond=1e-5)
    eta = rng.multivariate_normal(np.zeros(n), Q_pinv / tau, method='eigh')

    X = rng.uniform(-2, 2, n * p).reshape(n, -1)
    X[:, 0] = 1

    psi = np.exp(-np.logaddexp(0, -X @ beta + eta))
    z = rng.binomial(1, p=psi, size=n)

    W, y = {}, {}
    for i, j in zip(surveyed_sites, visits_per_site):
        _W = rng.uniform(-2, 2, size=j * q).reshape(j, -1)
        _W[:, 0] = 1
        d = np.exp(-np.logaddexp(0, -_W @ alpha))
        W[i] = _W
        y[i] = rng.binomial(1, z[i] * d)

    return Q, W, X, y, alpha, beta, tau, z
コード例 #42
0
    def compute_density_dF_PAk_gCorr(self, use_variance=True, alpha=1.0, comp_err=True):

        # check for deltaFij
        if self.Fij_array is None:
            self.compute_deltaFs_grads_semisum()

        if self.verb:
            print("dF_PAk_gCorr density estimation started")
            sec = time.time()

        dc = np.zeros(self.N, dtype=float)
        corrected_vols = np.zeros(self.N, dtype=float)
        log_den = np.zeros(self.N, dtype=float)
        log_den_err = np.zeros(self.N, dtype=float)
        prefactor = np.exp(
            self.intrinsic_dim / 2.0 * np.log(np.pi)
            - gammaln((self.intrinsic_dim + 2) / 2)
        )

        log_den_min = 9.9e300

        for i in range(self.N):
            k = int(self.kstar[i])
            dc[i] = self.distances[i, k]
            Fijs = self.Fij_array[self.nind_iptr[i] : self.nind_iptr[i + 1]]

            for j in range(1, k):
                Fij = Fijs[j - 1]
                rjjm1 = (
                    self.distances[i, j] ** self.intrinsic_dim
                    - self.distances[i, j - 1] ** self.intrinsic_dim
                )

                corrected_vols[i] += rjjm1 * np.exp(Fij)  # * (1+Fij)

        corrected_vols *= prefactor * self.N

        self.dc = dc

        # compute adjacency matrix and cumulative changes
        A = sparse.lil_matrix((self.N, self.N), dtype=np.float_)

        supp_deltaF = sparse.lil_matrix((self.N, self.N), dtype=np.float_)

        # define redundancy factor for each A matrix entry as the geometric mean of the 2 corresponding k*
        k1 = self.kstar[self.nind_list[:, 0]]
        k2 = self.kstar[self.nind_list[:, 1]]
        redundancy = np.sqrt(k1 * k2)

        if use_variance:
            for nspar, indices in enumerate(self.nind_list):
                i = indices[0]
                j = indices[1]
                tmp = 1.0 / self.Fij_var_array[nspar] / redundancy[nspar]
                A[i, j] = -tmp
                supp_deltaF[i, j] = self.Fij_array[nspar] * tmp
        else:
            for nspar, indices in enumerate(self.nind_list):
                i = indices[0]
                j = indices[1]
                A[i, j] = -1.0 / redundancy[nspar]
                supp_deltaF[i, j] = self.Fij_array[nspar] / redundancy[nspar]

        A = alpha * sparse.lil_matrix(A + A.transpose())

        diag = np.array(-A.sum(axis=1)).reshape((self.N,)) + (1.0 - alpha) * self.kstar

        #        print("Diag = {}".format(diag))

        A.setdiag(diag)

        deltaFcum = alpha * (
            np.array(supp_deltaF.sum(axis=0)).reshape((self.N,))
            - np.array(supp_deltaF.sum(axis=1)).reshape((self.N,))
        ) + (1.0 - alpha) * (self.kstar * (np.log(self.kstar / corrected_vols)))

        log_den = sparse.linalg.spsolve(A.tocsr(), deltaFcum)

        self.log_den = log_den

        if comp_err is True:
            self.A = A.todense()
            self.B = slin.pinvh(self.A)
            self.log_den_err = np.sqrt(np.diag(self.B))

        sec2 = time.time()
        if self.verb:
            print(
                "{0:0.2f} seconds for dF_PAk_gCorr density estimation".format(
                    sec2 - sec
                )
            )
コード例 #43
0
def latent_time_graphical_lasso(emp_cov,
                                alpha=0.01,
                                tau=1.,
                                rho=1.,
                                beta=1.,
                                eta=1.,
                                max_iter=100,
                                n_samples=None,
                                verbose=False,
                                psi='laplacian',
                                phi='laplacian',
                                mode='admm',
                                tol=1e-4,
                                rtol=1e-4,
                                return_history=False,
                                return_n_iter=True,
                                update_rho_options=None,
                                compute_objective=True,
                                init='empirical'):
    r"""Latent variable time-varying graphical lasso solver.

    Solves the following problem via ADMM:
      min sum_{i=1}^T -n_i log_likelihood(S_i, K_i-L_i) + alpha ||K_i||_{od,1}
          + tau ||L_i||_*
          + beta sum_{i=2}^T Psi(K_i - K_{i-1})
          + eta sum_{i=2}^T Phi(L_i - L_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, tau, beta, eta : float, optional
        Regularisation parameters.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    K, L : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    phi, prox_phi, phi_node_penalty = check_norm_prox(phi)

    Z_0 = init_precision(emp_cov, mode=init)
    Z_1 = Z_0.copy()[:-1]
    Z_2 = Z_0.copy()[1:]
    W_0 = np.zeros_like(Z_0)
    W_1 = np.zeros_like(Z_1)
    W_2 = np.zeros_like(Z_2)

    X_0 = np.zeros_like(Z_0)
    X_1 = np.zeros_like(Z_1)
    X_2 = np.zeros_like(Z_2)
    U_1 = np.zeros_like(W_1)
    U_2 = np.zeros_like(W_2)

    R_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)
    W_1_old = np.zeros_like(W_1)
    W_2_old = np.zeros_like(W_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    if n_samples is None:
        n_samples = np.ones(emp_cov.shape[0])

    checks = []
    for iteration_ in range(max_iter):
        # update R
        A = Z_0 - W_0 - X_0
        A += A.transpose(0, 2, 1)
        A /= 2.
        A *= -rho / n_samples[:, None, None]
        A += emp_cov
        # A = emp_cov / rho - A

        R = np.array(
            [prox_logdet(a, lamda=ni / rho) for a, ni in zip(A, n_samples)])

        # update Z_0
        A = R + W_0 + X_0
        A[:-1] += Z_1 - X_1
        A[1:] += Z_2 - X_2
        A /= divisor[:, None, None]
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # Z_0 = np.array(map(soft_thresholding_, A))
        Z_0 = soft_thresholding(A,
                                lamda=alpha / (rho * divisor[:, None, None]))

        # update Z_1, Z_2
        A_1 = Z_0[:-1] + X_1
        A_2 = Z_0[1:] + X_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * beta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update W_0
        A = Z_0 - R - X_0
        A[:-1] += W_1 - U_1
        A[1:] += W_2 - U_2
        A /= divisor[:, None, None]
        A += A.transpose(0, 2, 1)
        A /= 2.

        W_0 = np.array([
            prox_trace_indicator(a, lamda=tau / (rho * div))
            for a, div in zip(A, divisor)
        ])

        # update W_1, W_2
        A_1 = W_0[:-1] + U_1
        A_2 = W_0[1:] + U_2
        if not phi_node_penalty:
            prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho)
            W_1 = .5 * (A_1 + A_2 - prox_e)
            W_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            W_1, W_2 = prox_phi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * eta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        X_0 += R - Z_0 + W_0
        X_1 += Z_0[:-1] - Z_1
        X_2 += Z_0[1:] - Z_2
        U_1 += W_0[:-1] - W_1
        U_2 += W_0[1:] - W_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(R - Z_0 + W_0) + squared_norm(Z_0[:-1] - Z_1) +
            squared_norm(Z_0[1:] - Z_2) + squared_norm(W_0[:-1] - W_1) +
            squared_norm(W_0[1:] - W_2))

        snorm = rho * np.sqrt(
            squared_norm(R - R_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old) + squared_norm(W_1 - W_1_old) +
            squared_norm(W_2 - W_2_old))

        obj = objective(emp_cov, n_samples, R, Z_0, Z_1, Z_2, W_0, W_1, W_2,
                        alpha, tau, beta, eta, psi, phi) \
            if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * max(
                np.sqrt(
                    squared_norm(R) + squared_norm(Z_1) + squared_norm(Z_2) +
                    squared_norm(W_1) + squared_norm(W_2)),
                np.sqrt(
                    squared_norm(Z_0 - W_0) + squared_norm(Z_0[:-1]) +
                    squared_norm(Z_0[1:]) + squared_norm(W_0[:-1]) +
                    squared_norm(W_0[1:]))),
            e_dual=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * rho *
            (np.sqrt(
                squared_norm(X_0) + squared_norm(X_1) + squared_norm(X_2) +
                squared_norm(U_1) + squared_norm(U_2))))

        R_old = R.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()
        W_1_old = W_1.copy()
        W_2_old = W_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        X_0 *= rho / rho_new
        X_1 *= rho / rho_new
        X_2 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, W_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_)
    return return_list
コード例 #44
0
def time_graphical_lasso(emp_cov,
                         alpha=0.01,
                         rho=1,
                         beta=1,
                         theta=0.5,
                         max_iter=100,
                         n_samples=None,
                         verbose=False,
                         psi='laplacian',
                         tol=1e-4,
                         rtol=1e-4,
                         return_history=False,
                         return_n_iter=True,
                         mode='admm',
                         compute_objective=True,
                         stop_at=None,
                         stop_when=1e-4,
                         update_rho_options=None,
                         init='empirical'):
    """Time-varying graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(S_i, K_i) + alpha*||K_i||_{od,1}
            + beta sum_{i=2}^T Psi(K_i - K_{i-1})

    where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data
    matrix X (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    n_samples : ndarray
        Number of samples available for each time point.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    return_n_iter : bool, optional
        Return the number of iteration before convergence.
    verbose : bool, default False
        Print info at each iteration.
    update_rho_options : dict, optional
        Arguments for the rho update.
        See regain.update_rules.update_rho function for more information.
    compute_objective : bool, default True
        Choose to compute the objective value.
    init : {'empirical', 'zero', ndarray}
        Choose how to initialize the precision matrix, with the inverse
        empirical covariance, zero matrix or precomputed.

    Returns
    -------
    K : numpy.array, 3-dimensional (T x d x d)
        Solution to the problem for each time t=1...T .
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)

    Z_0 = init_precision(emp_cov, mode=init)
    Z_1 = Z_0.copy()[:-1]  # np.zeros_like(emp_cov)[:-1]
    Z_2 = Z_0.copy()[1:]  # np.zeros_like(emp_cov)[1:]

    U_0 = np.zeros_like(Z_0)
    U_1 = np.zeros_like(Z_1)
    U_2 = np.zeros_like(Z_2)

    Z_0_old = np.zeros_like(Z_0)
    Z_1_old = np.zeros_like(Z_1)
    Z_2_old = np.zeros_like(Z_2)

    # divisor for consensus variables, accounting for two less matrices
    divisor = np.full(emp_cov.shape[0], 3, dtype=float)
    divisor[0] -= 1
    divisor[-1] -= 1

    if n_samples is None:
        n_samples = np.ones(emp_cov.shape[0])

    checks = [
        convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_1, Z_2,
                                  alpha, beta, psi))
    ]
    for iteration_ in range(max_iter):
        # update K
        A = Z_0 - U_0
        A[:-1] += Z_1 - U_1
        A[1:] += Z_2 - U_2
        A += A.transpose(0, 2, 1)
        A /= 2.

        A *= -rho / n_samples[:, None, None]
        A += emp_cov

        K = np.array([
            prox_logdet_alt(a, lamda=rho * div) for a, div in zip(A, divisor)
        ])

        # update Z_0
        A = K + U_0
        A += A.transpose(0, 2, 1)
        A /= 2.
        Z_0 = soft_thresholding_od(A, lamda=alpha / rho)

        # other Zs
        A_1 = K[:-1] + U_1
        A_2 = K[1:] + U_2
        if not psi_node_penalty:
            prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho)
            Z_1 = .5 * (A_1 + A_2 - prox_e)
            Z_2 = .5 * (A_1 + A_2 + prox_e)
        else:
            Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1),
                                lamda=.5 * beta / rho,
                                rho=rho,
                                tol=tol,
                                rtol=rtol,
                                max_iter=max_iter)

        # update residuals
        U_0 += K - Z_0
        U_1 += K[:-1] - Z_1
        U_2 += K[1:] - Z_2

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(K - Z_0) + squared_norm(K[:-1] - Z_1) +
            squared_norm(K[1:] - Z_2))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) +
            squared_norm(Z_2 - Z_2_old))

        obj = objective(
            n_samples, emp_cov, Z_0, K, Z_1, Z_2, alpha, beta, psi) \
            if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=np.sqrt(K.size + 2 * Z_1.size) * tol + rtol * max(
                np.sqrt(
                    squared_norm(Z_0) + squared_norm(Z_1) + squared_norm(Z_2)),
                np.sqrt(
                    squared_norm(K) + squared_norm(K[:-1]) +
                    squared_norm(K[1:]))),
            e_dual=np.sqrt(K.size + 2 * Z_1.size) * tol + rtol * rho *
            np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)),
            # precision=Z_0.copy()
        )
        Z_0_old = Z_0.copy()
        Z_1_old = Z_1.copy()
        Z_2_old = Z_2.copy()

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U_0 *= rho / rho_new
        U_1 *= rho / rho_new
        U_2 *= rho / rho_new
        rho = rho_new

        #assert is_pos_def(Z_0)
    else:
        warnings.warn("Objective did not converge.")

    print(iteration_, penalty_objective(Z_0, Z_0[:-1], Z_0[1:], psi, theta))

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
コード例 #45
0
ファイル: extmath.py プロジェクト: BasilBeirouti/scikit-learn
def pinvh(a, cond=None, rcond=None, lower=True):
    return linalg.pinvh(a, cond, rcond, lower)
コード例 #46
0
def _initialize_metric_mahalanobis(input,
                                   init='identity',
                                   random_state=None,
                                   return_inverse=False,
                                   strict_pd=False,
                                   matrix_name='matrix'):
    """Returns a PSD matrix that can be used as a prior or an initialization
  for the Mahalanobis distance

  Parameters
  ----------
  input : array-like
    The input samples (can be tuples or regular samples).

  init : string or numpy array, optional (default='identity')
    Specification for the matrix to initialize. Possible options are
    'identity', 'covariance', 'random', and a numpy array of shape
    (n_features, n_features).

    'identity'
      An identity matrix of shape (n_features, n_features).

    'covariance'
      The (pseudo-)inverse covariance matrix (raises an error if the
      covariance matrix is not definite and `strict_pd == True`)

    'random'
      A random positive definite (PD) matrix of shape
      `(n_features, n_features)`, generated using
      `sklearn.datasets.make_spd_matrix`.

    numpy array
      A PSD matrix (or strictly PD if strict_pd==True) of
      shape (n_features, n_features), that will be used as such to
      initialize the metric, or set the prior.

  random_state : int or `numpy.RandomState` or None, optional (default=None)
    A pseudo random number generator object or a seed for it if int. If
    ``init='random'``, ``random_state`` is used to set the random Mahalanobis
    matrix. If ``init='pca'``, ``random_state`` is passed as an
    argument to PCA when initializing the matrix.

  return_inverse : bool, optional (default=False)
    Whether to return the inverse of the specified matrix. This
    can be sometimes useful. It will return the pseudo-inverse (which is the
    same as the inverse if the matrix is definite (i.e. invertible)). If
    `strict_pd == True` and the matrix is not definite, it will return an
    error.

  strict_pd : bool, optional (default=False)
    Whether to enforce that the provided matrix is definite (in addition to
    being PSD).

  param_name : str, optional (default='matrix')
    The name of the matrix used (example: 'init', 'prior'). Will be used in
    error messages.

  Returns
  -------
  M, or (M, M_inv) : `numpy.ndarray`
    The initial matrix to use M, and its inverse if `return_inverse=True`.
  """
    n_features = input.shape[-1]
    if isinstance(init, np.ndarray):
        # we copy the array, so that if we update the metric, we don't want to
        # update the init
        init = check_array(init, copy=True)

        # Assert that init.shape[1] = n_features
        if init.shape != (n_features, ) * 2:
            raise ValueError('The input dimensionality {} of the given '
                             'mahalanobis matrix `{}` must match the '
                             'dimensionality of the given inputs ({}).'.format(
                                 init.shape, matrix_name, n_features))

        # Assert that the matrix is symmetric
        if not np.allclose(init, init.T):
            raise ValueError("`{}` is not symmetric.".format(matrix_name))

    elif init not in ['identity', 'covariance', 'random']:
        raise ValueError(
            "`{}` must be 'identity', 'covariance', 'random' "
            "or a numpy array of shape (n_features, n_features).".format(
                matrix_name))

    random_state = check_random_state(random_state)
    M = init
    if isinstance(M, np.ndarray):
        w, V = eigh(M, check_finite=False)
        init_is_definite = _check_sdp_from_eigen(w)
        if strict_pd and not init_is_definite:
            raise LinAlgError(
                "You should provide a strictly positive definite "
                "matrix as `{}`. This one is not definite. Try another"
                " {}, or an algorithm that does not "
                "require the {} to be strictly positive definite.".format(
                    *((matrix_name, ) * 3)))
        elif return_inverse and not init_is_definite:
            warnings.warn('The initialization matrix is not invertible: '
                          'using the pseudo-inverse instead.')
        if return_inverse:
            M_inv = _pseudo_inverse_from_eig(w, V)
            return M, M_inv
        else:
            return M
    elif init == 'identity':
        M = np.eye(n_features, n_features)
        if return_inverse:
            M_inv = M.copy()
            return M, M_inv
        else:
            return M
    elif init == 'covariance':
        if input.ndim == 3:
            # if the input are tuples, we need to form an X by deduplication
            X = np.vstack(
                {tuple(row)
                 for row in input.reshape(-1, n_features)})
        else:
            X = input
        # atleast2d is necessary to deal with scalar covariance matrices
        M_inv = np.atleast_2d(np.cov(X, rowvar=False))
        w, V = eigh(M_inv, check_finite=False)
        cov_is_definite = _check_sdp_from_eigen(w)
        if strict_pd and not cov_is_definite:
            raise LinAlgError(
                "Unable to get a true inverse of the covariance "
                "matrix since it is not definite. Try another "
                "`{}`, or an algorithm that does not "
                "require the `{}` to be strictly positive definite.".format(
                    *((matrix_name, ) * 2)))
        elif not cov_is_definite:
            warnings.warn(
                'The covariance matrix is not invertible: '
                'using the pseudo-inverse instead.'
                'To make the covariance matrix invertible'
                ' you can remove any linearly dependent features and/or '
                'reduce the dimensionality of your input, '
                'for instance using `sklearn.decomposition.PCA` as a '
                'preprocessing step.')
        M = _pseudo_inverse_from_eig(w, V)
        if return_inverse:
            return M, M_inv
        else:
            return M
    elif init == 'random':
        # we need to create a random symmetric matrix
        M = make_spd_matrix(n_features, random_state=random_state)
        if return_inverse:
            # we use pinvh even if we know the matrix is definite, just because
            # we need the returned matrix to be symmetric (and sometimes
            # np.linalg.inv returns not symmetric inverses of symmetric matrices)
            # TODO: there might be a more efficient method to do so
            M_inv = pinvh(M)
            return M, M_inv
        else:
            return M
コード例 #47
0
ファイル: my_gp_module.py プロジェクト: marcocaccin/MarcoGP
    def fit(self, X, y):
        """
        The Gaussian Process model fitting method.

        Parameters
        ----------
        X : double array_like
            An array with shape (n_samples, n_features) with the input at which
            observations were made.

        y : double array_like
            An array with shape (n_samples, ) or shape (n_samples, n_targets)
            with the observations of the output to be predicted.

        Returns
        -------
        gp : self
            A fitted Gaussian Process model object awaiting data to perform
            predictions.
        """

        K = self.calc_kernel_matrix(X)
        # # Force data to 2D numpy.array
        X = array2d(X)
        n_samples, n_features = X.shape
        y = sp.asarray(y)
        self.y_ndim_ = y.ndim
        if y.ndim == 1:
            y = y[:, sp.newaxis]
        _, n_targets = y.shape

        # # Normalise output data or not
        if self.normalise == 1:
            y_mean = sp.mean(y, axis=0)
            y_std = sp.std(y, axis=0)
            y_std[y_std == 0.] = 1.
            y = (y - y_mean) / y_std
        else:
            y_mean = 0.0
            y_std  = 1.0

        err = 'Dummy error message'
        inverse = K + self.nugget * sp.ones(n_samples)
        try:
            # print "is symmetric", Cholesky.isSymmetric(inverse)
            # upper_triang = Cholesky.Cholesky(inverse)
            # inverse = Cholesky.CholeskyInverse(upper_triang)
            inverse = LA.inv(inverse)
        except LA.LinAlgError as err:
            print "inv failed: %s. Switching to pinvh" % err
            try:
                inverse = LA.pinvh(inverse)
            except LA.LinAlgError as err:
                print "pinvh failed: %s. Switching to pinv2" % err
                try:
                    inverse = LA.pinv2(inverse)
                except LA.LinAlgError as err:
                    print "pinv2 failed: %s. Failed to invert matrix." % err
                    inverse = None

        # alpha is the vector of regression coefficients of GaussianProcess
        alpha = sp.dot(inverse, y)

        self.y = y
        self.y_mean, self.y_std = y_mean, y_std
        if not self.low_memory:
            self.inverse = inverse
        self.alpha = sp.array(alpha)
コード例 #48
0
    def fit(self, x, y):
        niter_max = self.niter_max
        l2 = self.l2

        n_unique_y = len(np.unique(y))
        if n_unique_y == 1:
            print(
                'The training data set is USELESS because it contains only 1 class'
            )

        elif n_unique_y == 2:  # binary
            # convert 0,1 to -1, 1
            y = 2 * y - 1.

            #print(niter_max)
            n = x.shape[1]
            y1 = (y + 1) / 2

            x_av = np.mean(x, axis=0)
            dx = x - x_av
            c = np.cov(dx, rowvar=False, bias=True)

            # 2019.07.16:
            c += l2 * np.identity(n) / (2 * len(y))
            c_inv = linalg.pinvh(c)

            # initial values
            h0 = 0.
            w = np.random.normal(0.0, 1. / np.sqrt(n), size=(n))

            cost = np.full(niter_max, 100.)
            for iloop in range(niter_max):
                h = h0 + x.dot(w)
                y_model = np.tanh(h)

                # stopping criterion
                cost[iloop] = ((y[:] - y_model[:])**2).mean()

                # 2019.07.12: lost function
                #p = 1/(1+np.exp(-2*h))
                #cost[iloop] = (-y1[:]*np.log(p) - (1-y1)*np.log(1-p)).mean()

                if iloop > 0 and cost[iloop] >= cost[iloop - 1]: break

                # update local field
                t = h != 0
                h[t] *= y[t] / y_model[t]
                h[~t] = y[~t]

                # find w from h
                h_av = h.mean()
                dh = h - h_av
                dhdx = dh[:, np.newaxis] * dx[:, :]

                dhdx_av = dhdx.mean(axis=0)
                w = c_inv.dot(dhdx_av)
                h0 = h_av - x_av.dot(w)

            self.h0 = h0
            self.w = w
            self.classtype = 'binary'

        else:  # multiple classes
            """ -----------------------------------------------------------------------
            2019.06.14: fit h0 and w based on Expectation Reflection
            input: features x[l,n], target: y[l,m] (y = +/-1)
             output: h0[m], w[n,m]
            """
            #def fit_multi(self,x,y,niter_max=500,l2=0.001):
            onehot_encoder = OneHotEncoder(sparse=False, categories='auto')
            y_onehot = onehot_encoder.fit_transform(y.reshape(-1, 1))

            y_onehot = 2 * y_onehot - 1  # convert to -1, +1

            y1 = (y + 1) / 2  # convert to 1, 1

            #print(niter_max)
            n = x.shape[1]
            m = y_onehot.shape[1]  # number of categories

            x_av = np.mean(x, axis=0)
            dx = x - x_av
            c = np.cov(dx, rowvar=False, bias=True)

            # 2019.07.16:  l2 = lamda/(2L)
            c += l2 * np.identity(n) / (2 * len(y))
            c_inv = linalg.pinvh(c)

            H0 = np.zeros(m)
            W = np.zeros((n, m))

            for i in range(m):
                y = y_onehot[:, i]
                # initial values
                h0 = 0.
                w = np.random.normal(0.0, 1. / np.sqrt(n), size=(n))

                cost = np.full(niter_max, 100.)
                for iloop in range(niter_max):
                    h = h0 + x.dot(w)
                    y_model = np.tanh(h)

                    # stopping criterion
                    cost[iloop] = ((y[:] - y_model[:])**2).mean()

                    # 2019.07.12: lost function
                    #p = 1/(1+np.exp(-2*h))
                    #cost[iloop] = (-y1[:]*np.log(p) - (1-y1)*np.log(1-p)).mean()

                    if iloop > 0 and cost[iloop] >= cost[iloop - 1]: break

                    # update local field
                    t = h != 0
                    h[t] *= y[t] / y_model[t]
                    h[~t] = y[~t]

                    # find w from h
                    h_av = h.mean()
                    dh = h - h_av
                    dhdx = dh[:, np.newaxis] * dx[:, :]

                    dhdx_av = dhdx.mean(axis=0)
                    w = c_inv.dot(dhdx_av)
                    h0 = h_av - x_av.dot(w)

                H0[i] = h0
                W[:, i] = w

            self.h0 = H0
            self.w = W
            self.classtype = 'multi'
コード例 #49
0
def first_derivative_h(D, K, delta=5):
    return - 0.5 * (D - (delta - 2) * linalg.pinvh(K))
コード例 #50
0
def pinvh(a, cond=None, rcond=None, lower=True):
    return linalg.pinvh(a, cond, rcond, lower)
コード例 #51
0
ファイル: test_basic.py プロジェクト: 7924102/scipy
 def test_simple_real(self):
     a = array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=float)
     a = np.dot(a, a.T)
     a_pinv = pinvh(a)
     assert_array_almost_equal(np.dot(a, a_pinv), np.eye(3))
コード例 #52
0
    def fit(self, X, y, evidence_approx_method="fp", max_iter=100):
        '''
        Fits Bayesian linear regression, returns posterior mean and preision 
        of parameters
        
        Parameters
        ----------
        X: array-like of size [n_samples,n_features]
           Matrix of explanatory variables (should not include bias term)
       
        Y: array-like of size [n_features]
           Vector of dependent variables.
           
        Returns
        -------
        object: self
          self
    
        '''
        # preprocess data
        X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)
        n_samples, n_features = X.shape
        X, y, X_mean, y_mean, X_std = self._center_data(
            X, y, self.fit_intercept, self.copy_X)
        self._x_mean_ = X_mean
        self._y_mean_ = y_mean
        self._x_std_ = X_std
        self.scores_ = [np.NINF]

        #  precision of noise & and coefficients
        alpha = self.alpha
        var_y = np.var(y)
        # check that variance is non zero !!!
        if var_y == 0:
            beta = 1e-2
        else:
            beta = 1. / np.var(y)

        # to speed all further computations save svd decomposition and reuse it later
        u, d, v = svd(X, full_matrices=False)
        Uy = np.dot(u.T, y)
        dsq = d**2

        for i in range(self.n_iter):

            # find mean for posterior of w ( for EM this is E-step)
            p1_mu = v.T * (d / (dsq + alpha / beta))
            mu = np.dot(p1_mu, Uy)

            # precompute errors, since both methods use it in estimation
            error = y - np.dot(X, mu)
            sqdErr = np.dot(error, error)

            if sqdErr / n_samples < self.lambda_0:
                self.perfect_fit = True
                warnings.warn(
                    ('Almost perfect fit!!! Estimated values of variance '
                     'for predictive distribution are computed using only '
                     'Residual Sum of Squares, terefore they do not increase '
                     'in case of extrapolation'))
                break

            if self.optimizer == "fp":
                gamma = np.sum(dsq / (dsq + alpha / beta))
                # use updated mu and gamma parameters to update alpha and beta
                alpha = gamma / np.dot(mu, mu)
                beta = (n_samples - gamma) / sqdErr
            else:
                # M-step, update parameters alpha and beta to maximize ML TYPE II
                alpha = n_features / (np.dot(mu, mu) +
                                      np.sum(1 / (beta * dsq + alpha)))
                beta = n_samples / (sqdErr + np.sum(dsq /
                                                    (beta * dsq + alpha)))

            # calculate log likelihood p(Y | X, alpha, beta) (constants are not included)
            normaliser = 0.5 * (n_features * np.log(alpha) +
                                n_samples * np.log(beta))
            normaliser -= 0.5 * np.sum(np.log(beta * dsq + alpha))
            log_like = normaliser - 0.5 * alpha * np.sum(mu**2)
            log_like -= 0.5 * beta * sqdErr - 0.5 * n_samples * np.log(
                2 * np.pi)
            self.scores_.append(log_like)

            if self.verbose:
                print(("Iteration {0} completed, value of log "
                       "likelihood is {1}".format(i, log_like)))

            # if change in log-likelihood is smaller than threshold terminate
            converged = (self.scores_[-1] - self.scores_[-2] < self.tol)
            if converged or i == self.n_iter - 1:
                break

        # pinvh is used for numerical stability (inverse has clased form solution)
        self.sigma_ = pinvh(np.dot(v.T * (beta * dsq + alpha), v))
        self.coef_ = beta * np.dot(self.sigma_, np.dot(X.T, y))
        self._set_intercept(X_mean, y_mean, X_std)
        self.beta_ = beta
        self.alpha_ = alpha
        return self
コード例 #53
0
ファイル: graph_lasso_.py プロジェクト: abecadel/scikit-learn
def graphical_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4,
                    enet_tol=1e-4, max_iter=100, verbose=False,
                    return_costs=False, eps=np.finfo(np.float64).eps,
                    return_n_iter=False):
    """l1-penalized covariance estimator

    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.

    Parameters
    ----------
    emp_cov : 2D ndarray, shape (n_features, n_features)
        Empirical covariance from which to compute the covariance estimate.

    alpha : positive float
        The regularization parameter: the higher alpha, the more
        regularization, the sparser the inverse covariance.

    cov_init : 2D array (n_features, n_features), optional
        The initial guess for the covariance.

    mode : {'cd', 'lars'}
        The Lasso solver to use: coordinate descent or LARS. Use LARS for
        very sparse underlying graphs, where p > n. Elsewhere prefer cd
        which is more numerically stable.

    tol : positive float, optional
        The tolerance to declare convergence: if the dual gap goes below
        this value, iterations are stopped.

    enet_tol : positive float, optional
        The tolerance for the elastic net solver used to calculate the descent
        direction. This parameter controls the accuracy of the search direction
        for a given column update, not of the overall parameter estimate. Only
        used for mode='cd'.

    max_iter : integer, optional
        The maximum number of iterations.

    verbose : boolean, optional
        If verbose is True, the objective function and dual gap are
        printed at each iteration.

    return_costs : boolean, optional
        If return_costs is True, the objective function and dual gap
        at each iteration are returned.

    eps : float, optional
        The machine-precision regularization in the computation of the
        Cholesky diagonal factors. Increase this for very ill-conditioned
        systems.

    return_n_iter : bool, optional
        Whether or not to return the number of iterations.

    Returns
    -------
    covariance : 2D ndarray, shape (n_features, n_features)
        The estimated covariance matrix.

    precision : 2D ndarray, shape (n_features, n_features)
        The estimated (sparse) precision matrix.

    costs : list of (objective, dual_gap) pairs
        The list of values of the objective function and the dual gap at
        each iteration. Returned only if return_costs is True.

    n_iter : int
        Number of iterations. Returned only if `return_n_iter` is set to True.

    See Also
    --------
    GraphicalLasso, GraphicalLassoCV

    Notes
    -----
    The algorithm employed to solve this problem is the GLasso algorithm,
    from the Friedman 2008 Biostatistics paper. It is the same algorithm
    as in the R `glasso` package.

    One possible difference with the `glasso` R package is that the
    diagonal coefficients are not penalized.

    """
    _, n_features = emp_cov.shape
    if alpha == 0:
        if return_costs:
            precision_ = linalg.inv(emp_cov)
            cost = - 2. * log_likelihood(emp_cov, precision_)
            cost += n_features * np.log(2 * np.pi)
            d_gap = np.sum(emp_cov * precision_) - n_features
            if return_n_iter:
                return emp_cov, precision_, (cost, d_gap), 0
            else:
                return emp_cov, precision_, (cost, d_gap)
        else:
            if return_n_iter:
                return emp_cov, linalg.inv(emp_cov), 0
            else:
                return emp_cov, linalg.inv(emp_cov)
    if cov_init is None:
        covariance_ = emp_cov.copy()
    else:
        covariance_ = cov_init.copy()
    # As a trivial regularization (Tikhonov like), we scale down the
    # off-diagonal coefficients of our starting point: This is needed, as
    # in the cross-validation the cov_init can easily be
    # ill-conditioned, and the CV loop blows. Beside, this takes
    # conservative stand-point on the initial conditions, and it tends to
    # make the convergence go faster.
    covariance_ *= 0.95
    diagonal = emp_cov.flat[::n_features + 1]
    covariance_.flat[::n_features + 1] = diagonal
    precision_ = linalg.pinvh(covariance_)

    indices = np.arange(n_features)
    costs = list()
    # The different l1 regression solver have different numerical errors
    if mode == 'cd':
        errors = dict(over='raise', invalid='ignore')
    else:
        errors = dict(invalid='raise')
    try:
        # be robust to the max_iter=0 edge case, see:
        # https://github.com/scikit-learn/scikit-learn/issues/4134
        d_gap = np.inf
        # set a sub_covariance buffer
        sub_covariance = np.ascontiguousarray(covariance_[1:, 1:])
        for i in range(max_iter):
            for idx in range(n_features):
                # To keep the contiguous matrix `sub_covariance` equal to
                # covariance_[indices != idx].T[indices != idx]
                # we only need to update 1 column and 1 line when idx changes
                if idx > 0:
                    di = idx - 1
                    sub_covariance[di] = covariance_[di][indices != idx]
                    sub_covariance[:, di] = covariance_[:, di][indices != idx]
                else:
                    sub_covariance[:] = covariance_[1:, 1:]
                row = emp_cov[idx, indices != idx]
                with np.errstate(**errors):
                    if mode == 'cd':
                        # Use coordinate descent
                        coefs = -(precision_[indices != idx, idx]
                                  / (precision_[idx, idx] + 1000 * eps))
                        coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram(
                            coefs, alpha, 0, sub_covariance,
                            row, row, max_iter, enet_tol,
                            check_random_state(None), False)
                    else:
                        # Use LARS
                        _, _, coefs = lars_path(
                            sub_covariance, row, Xy=row, Gram=sub_covariance,
                            alpha_min=alpha / (n_features - 1), copy_Gram=True,
                            eps=eps, method='lars', return_path=False)
                # Update the precision matrix
                precision_[idx, idx] = (
                    1. / (covariance_[idx, idx]
                          - np.dot(covariance_[indices != idx, idx], coefs)))
                precision_[indices != idx, idx] = (- precision_[idx, idx]
                                                   * coefs)
                precision_[idx, indices != idx] = (- precision_[idx, idx]
                                                   * coefs)
                coefs = np.dot(sub_covariance, coefs)
                covariance_[idx, indices != idx] = coefs
                covariance_[indices != idx, idx] = coefs
            d_gap = _dual_gap(emp_cov, precision_, alpha)
            cost = _objective(emp_cov, precision_, alpha)
            if verbose:
                print('[graphical_lasso] Iteration '
                      '% 3i, cost % 3.2e, dual gap %.3e'
                      % (i, cost, d_gap))
            if return_costs:
                costs.append((cost, d_gap))
            if np.abs(d_gap) < tol:
                break
            if not np.isfinite(cost) and i > 0:
                raise FloatingPointError('Non SPD result: the system is '
                                         'too ill-conditioned for this solver')
        else:
            warnings.warn('graphical_lasso: did not converge after '
                          '%i iteration: dual gap: %.3e'
                          % (max_iter, d_gap), ConvergenceWarning)
    except FloatingPointError as e:
        e.args = (e.args[0]
                  + '. The system is too ill-conditioned for this solver',)
        raise e

    if return_costs:
        if return_n_iter:
            return covariance_, precision_, costs, i + 1
        else:
            return covariance_, precision_, costs
    else:
        if return_n_iter:
            return covariance_, precision_, i + 1
        else:
            return covariance_, precision_
コード例 #54
0
 def test_simple_real(self):
     a = array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=float)
     a = np.dot(a, a.T)
     a_pinv = pinvh(a)
     assert_array_almost_equal(np.dot(a, a_pinv), np.eye(3))
コード例 #55
0
 def test_simple_complex(self):
     a = (array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=float) +
          1j * array([[10, 8, 7], [6, 5, 4], [3, 2, 1]], dtype=float))
     a = np.dot(a, a.conj().T)
     a_pinv = pinvh(a)
     assert_array_almost_equal(np.dot(a, a_pinv), np.eye(3))
コード例 #56
0
def kernel_time_graphical_lasso(
    emp_cov,
    alpha=0.01,
    rho=1,
    kernel=None,
    max_iter=100,
    n_samples=None,
    verbose=False,
    psi="laplacian",
    tol=1e-4,
    rtol=1e-4,
    return_history=False,
    return_n_iter=True,
    mode="admm",
    update_rho_options=None,
    compute_objective=True,
    stop_at=None,
    stop_when=1e-4,
    init="empirical",
):
    """Time-varying graphical lasso solver.

    Solves the following problem via ADMM:
        min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1}
            + sum_{s>t}^T k_psi(s,t) Psi(K_s - K_t)

    where S is the empirical covariance of the data
    matrix D (training observations by features).

    Parameters
    ----------
    emp_cov : ndarray, shape (n_features, n_features)
        Empirical covariance of data.
    alpha, beta : float, optional
        Regularisation parameter.
    rho : float, optional
        Augmented Lagrangian parameter.
    max_iter : int, optional
        Maximum number of iterations.
    tol : float, optional
        Absolute tolerance for convergence.
    rtol : float, optional
        Relative tolerance for convergence.
    return_history : bool, optional
        Return the history of computed values.
    init : {'empirical', 'zeros', ndarray}, default 'empirical'
        How to initialise the inverse covariance matrix. Default is take
        the empirical covariance and inverting it.

    Returns
    -------
    X : numpy.array, 2-dimensional
        Solution to the problem.
    history : list
        If return_history, then also a structure that contains the
        objective value, the primal and dual residual norms, and tolerances
        for the primal and dual residual norms at each iteration.

    """
    psi, prox_psi, psi_node_penalty = check_norm_prox(psi)
    n_times, _, n_features = emp_cov.shape

    if kernel is None:
        kernel = np.eye(n_times)

    Z_0 = init_precision(emp_cov, mode=init)
    U_0 = np.zeros_like(Z_0)
    Z_0_old = np.zeros_like(Z_0)

    Z_M, Z_M_old = {}, {}
    U_M = {}
    for m in range(1, n_times):
        # all possible markovians jumps
        Z_L = Z_0.copy()[:-m]
        Z_R = Z_0.copy()[m:]
        Z_M[m] = (Z_L, Z_R)

        U_L = np.zeros_like(Z_L)
        U_R = np.zeros_like(Z_R)
        U_M[m] = (U_L, U_R)

        Z_L_old = np.zeros_like(Z_L)
        Z_R_old = np.zeros_like(Z_R)
        Z_M_old[m] = (Z_L_old, Z_R_old)

    if n_samples is None:
        n_samples = np.ones(n_times)

    checks = [
        convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_M, alpha,
                                  kernel, psi))
    ]
    for iteration_ in range(max_iter):
        # update K
        A = Z_0 - U_0
        for m in range(1, n_times):
            A[:-m] += Z_M[m][0] - U_M[m][0]
            A[m:] += Z_M[m][1] - U_M[m][1]

        A /= n_times
        # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho)
        # K = np.array(map(soft_thresholding_, A))
        A += A.transpose(0, 2, 1)
        A /= 2.0

        A *= -rho * n_times / n_samples[:, None, None]
        A += emp_cov

        K = np.array([
            prox_logdet(a, lamda=ni / (rho * n_times))
            for a, ni in zip(A, n_samples)
        ])

        # update Z_0
        A = K + U_0
        A += A.transpose(0, 2, 1)
        A /= 2.0
        Z_0 = soft_thresholding(A, lamda=alpha / rho)

        # update residuals
        U_0 += K - Z_0

        # other Zs
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            A_L = K[:-m] + U_L
            A_R = K[m:] + U_R
            if not psi_node_penalty:
                prox_e = prox_psi(A_R - A_L,
                                  lamda=2.0 *
                                  np.diag(kernel, m)[:, None, None] / rho)
                Z_L = 0.5 * (A_L + A_R - prox_e)
                Z_R = 0.5 * (A_L + A_R + prox_e)
            else:
                Z_L, Z_R = prox_psi(
                    np.concatenate((A_L, A_R), axis=1),
                    lamda=0.5 * np.diag(kernel, m)[:, None, None] / rho,
                    rho=rho,
                    tol=tol,
                    rtol=rtol,
                    max_iter=max_iter,
                )
            Z_M[m] = (Z_L, Z_R)

            # update other residuals
            U_L += K[:-m] - Z_L
            U_R += K[m:] - Z_R

        # diagnostics, reporting, termination checks
        rnorm = np.sqrt(
            squared_norm(K - Z_0) + sum(
                squared_norm(K[:-m] - Z_M[m][0]) +
                squared_norm(K[m:] - Z_M[m][1]) for m in range(1, n_times)))

        snorm = rho * np.sqrt(
            squared_norm(Z_0 - Z_0_old) + sum(
                squared_norm(Z_M[m][0] - Z_M_old[m][0]) +
                squared_norm(Z_M[m][1] - Z_M_old[m][1])
                for m in range(1, n_times)))

        obj = objective(n_samples, emp_cov, Z_0, K, Z_M, alpha, kernel,
                        psi) if compute_objective else np.nan

        check = convergence(
            obj=obj,
            rnorm=rnorm,
            snorm=snorm,
            e_pri=n_features * n_times * tol + rtol * max(
                np.sqrt(
                    squared_norm(Z_0) + sum(
                        squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1])
                        for m in range(1, n_times))),
                np.sqrt(
                    squared_norm(K) + sum(
                        squared_norm(K[:-m]) + squared_norm(K[m:])
                        for m in range(1, n_times))),
            ),
            e_dual=n_features * n_times * tol + rtol * rho * np.sqrt(
                squared_norm(U_0) + sum(
                    squared_norm(U_M[m][0]) + squared_norm(U_M[m][1])
                    for m in range(1, n_times))),
        )
        Z_0_old = Z_0.copy()
        for m in range(1, n_times):
            Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy())

        if verbose:
            print("obj: %.4f, rnorm: %.4f, snorm: %.4f,"
                  "eps_pri: %.4f, eps_dual: %.4f" % check[:5])

        checks.append(check)
        if stop_at is not None:
            if abs(check.obj - stop_at) / abs(stop_at) < stop_when:
                break

        if check.rnorm <= check.e_pri and check.snorm <= check.e_dual:
            break

        rho_new = update_rho(rho,
                             rnorm,
                             snorm,
                             iteration=iteration_,
                             **(update_rho_options or {}))
        # scaled dual variables should be also rescaled
        U_0 *= rho / rho_new
        for m in range(1, n_times):
            U_L, U_R = U_M[m]
            U_L *= rho / rho_new
            U_R *= rho / rho_new
        rho = rho_new
    else:
        warnings.warn("Objective did not converge.")

    covariance_ = np.array([linalg.pinvh(x) for x in Z_0])
    return_list = [Z_0, covariance_]
    if return_history:
        return_list.append(checks)
    if return_n_iter:
        return_list.append(iteration_ + 1)
    return return_list
コード例 #57
0
ファイル: bayes.py プロジェクト: raghavrv/scikit-learn
    def fit(self, X, y):
        """Fit the ARDRegression model according to the given training data
        and parameters.

        Iterative procedure to maximize the evidence

        Parameters
        ----------
        X : array-like, shape = [n_samples, n_features]
            Training vector, where n_samples in the number of samples and
            n_features is the number of features.
        y : array, shape = [n_samples]
            Target values (integers). Will be cast to X's dtype if necessary

        Returns
        -------
        self : returns an instance of self.
        """
        X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True)

        n_samples, n_features = X.shape
        coef_ = np.zeros(n_features)

        X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data(
            X, y, self.fit_intercept, self.normalize, self.copy_X)

        # Launch the convergence loop
        keep_lambda = np.ones(n_features, dtype=bool)

        lambda_1 = self.lambda_1
        lambda_2 = self.lambda_2
        alpha_1 = self.alpha_1
        alpha_2 = self.alpha_2
        verbose = self.verbose

        # Initialization of the values of the parameters
        alpha_ = 1. / np.var(y)
        lambda_ = np.ones(n_features)

        self.scores_ = list()
        coef_old_ = None

        # Iterative procedure of ARDRegression
        for iter_ in range(self.n_iter):
            # Compute mu and sigma (using Woodbury matrix identity)
            sigma_ = pinvh(np.eye(n_samples) / alpha_ +
                           np.dot(X[:, keep_lambda] *
                           np.reshape(1. / lambda_[keep_lambda], [1, -1]),
                           X[:, keep_lambda].T))
            sigma_ = np.dot(sigma_, X[:, keep_lambda] *
                            np.reshape(1. / lambda_[keep_lambda], [1, -1]))
            sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1]) *
                              X[:, keep_lambda].T, sigma_)
            sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda]
            coef_[keep_lambda] = alpha_ * np.dot(
                sigma_, np.dot(X[:, keep_lambda].T, y))

            # Update alpha and lambda
            rmse_ = np.sum((y - np.dot(X, coef_)) ** 2)
            gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_)
            lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) /
                                    ((coef_[keep_lambda]) ** 2 +
                                     2. * lambda_2))
            alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) /
                      (rmse_ + 2. * alpha_2))

            # Prune the weights with a precision over a threshold
            keep_lambda = lambda_ < self.threshold_lambda
            coef_[~keep_lambda] = 0

            # Compute the objective function
            if self.compute_score:
                s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum()
                s += alpha_1 * log(alpha_) - alpha_2 * alpha_
                s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) +
                            np.sum(np.log(lambda_)))
                s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum())
                self.scores_.append(s)

            # Check for convergence
            if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol:
                if verbose:
                    print("Converged after %s iterations" % iter_)
                break
            coef_old_ = np.copy(coef_)

        self.coef_ = coef_
        self.alpha_ = alpha_
        self.sigma_ = sigma_
        self.lambda_ = lambda_
        self._set_intercept(X_offset_, y_offset_, X_scale_)
        return self
コード例 #58
0
def graphical_lasso(
    emp_cov,
    alpha,
    *,
    cov_init=None,
    mode="cd",
    tol=1e-4,
    enet_tol=1e-4,
    max_iter=100,
    verbose=False,
    return_costs=False,
    eps=np.finfo(np.float64).eps,
    return_n_iter=False,
):
    """l1-penalized covariance estimator

    Read more in the :ref:`User Guide <sparse_inverse_covariance>`.

    .. versionchanged:: v0.20
        graph_lasso has been renamed to graphical_lasso

    Parameters
    ----------
    emp_cov : ndarray of shape (n_features, n_features)
        Empirical covariance from which to compute the covariance estimate.

    alpha : float
        The regularization parameter: the higher alpha, the more
        regularization, the sparser the inverse covariance.
        Range is (0, inf].

    cov_init : array of shape (n_features, n_features), default=None
        The initial guess for the covariance. If None, then the empirical
        covariance is used.

    mode : {'cd', 'lars'}, default='cd'
        The Lasso solver to use: coordinate descent or LARS. Use LARS for
        very sparse underlying graphs, where p > n. Elsewhere prefer cd
        which is more numerically stable.

    tol : float, default=1e-4
        The tolerance to declare convergence: if the dual gap goes below
        this value, iterations are stopped. Range is (0, inf].

    enet_tol : float, default=1e-4
        The tolerance for the elastic net solver used to calculate the descent
        direction. This parameter controls the accuracy of the search direction
        for a given column update, not of the overall parameter estimate. Only
        used for mode='cd'. Range is (0, inf].

    max_iter : int, default=100
        The maximum number of iterations.

    verbose : bool, default=False
        If verbose is True, the objective function and dual gap are
        printed at each iteration.

    return_costs : bool, default=Flase
        If return_costs is True, the objective function and dual gap
        at each iteration are returned.

    eps : float, default=eps
        The machine-precision regularization in the computation of the
        Cholesky diagonal factors. Increase this for very ill-conditioned
        systems. Default is `np.finfo(np.float64).eps`.

    return_n_iter : bool, default=False
        Whether or not to return the number of iterations.

    Returns
    -------
    covariance : ndarray of shape (n_features, n_features)
        The estimated covariance matrix.

    precision : ndarray of shape (n_features, n_features)
        The estimated (sparse) precision matrix.

    costs : list of (objective, dual_gap) pairs
        The list of values of the objective function and the dual gap at
        each iteration. Returned only if return_costs is True.

    n_iter : int
        Number of iterations. Returned only if `return_n_iter` is set to True.

    See Also
    --------
    GraphicalLasso, GraphicalLassoCV

    Notes
    -----
    The algorithm employed to solve this problem is the GLasso algorithm,
    from the Friedman 2008 Biostatistics paper. It is the same algorithm
    as in the R `glasso` package.

    One possible difference with the `glasso` R package is that the
    diagonal coefficients are not penalized.
    """
    _, n_features = emp_cov.shape
    if alpha == 0:
        if return_costs:
            precision_ = linalg.inv(emp_cov)
            cost = -2.0 * log_likelihood(emp_cov, precision_)
            cost += n_features * np.log(2 * np.pi)
            d_gap = np.sum(emp_cov * precision_) - n_features
            if return_n_iter:
                return emp_cov, precision_, (cost, d_gap), 0
            else:
                return emp_cov, precision_, (cost, d_gap)
        else:
            if return_n_iter:
                return emp_cov, linalg.inv(emp_cov), 0
            else:
                return emp_cov, linalg.inv(emp_cov)
    if cov_init is None:
        covariance_ = emp_cov.copy()
    else:
        covariance_ = cov_init.copy()
    # As a trivial regularization (Tikhonov like), we scale down the
    # off-diagonal coefficients of our starting point: This is needed, as
    # in the cross-validation the cov_init can easily be
    # ill-conditioned, and the CV loop blows. Beside, this takes
    # conservative stand-point on the initial conditions, and it tends to
    # make the convergence go faster.
    covariance_ *= 0.95
    diagonal = emp_cov.flat[:: n_features + 1]
    covariance_.flat[:: n_features + 1] = diagonal
    precision_ = linalg.pinvh(covariance_)

    indices = np.arange(n_features)
    costs = list()
    # The different l1 regression solver have different numerical errors
    if mode == "cd":
        errors = dict(over="raise", invalid="ignore")
    else:
        errors = dict(invalid="raise")
    try:
        # be robust to the max_iter=0 edge case, see:
        # https://github.com/scikit-learn/scikit-learn/issues/4134
        d_gap = np.inf
        # set a sub_covariance buffer
        sub_covariance = np.copy(covariance_[1:, 1:], order="C")
        for i in range(max_iter):
            for idx in range(n_features):
                # To keep the contiguous matrix `sub_covariance` equal to
                # covariance_[indices != idx].T[indices != idx]
                # we only need to update 1 column and 1 line when idx changes
                if idx > 0:
                    di = idx - 1
                    sub_covariance[di] = covariance_[di][indices != idx]
                    sub_covariance[:, di] = covariance_[:, di][indices != idx]
                else:
                    sub_covariance[:] = covariance_[1:, 1:]
                row = emp_cov[idx, indices != idx]
                with np.errstate(**errors):
                    if mode == "cd":
                        # Use coordinate descent
                        coefs = -(
                            precision_[indices != idx, idx]
                            / (precision_[idx, idx] + 1000 * eps)
                        )
                        coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram(
                            coefs,
                            alpha,
                            0,
                            sub_covariance,
                            row,
                            row,
                            max_iter,
                            enet_tol,
                            check_random_state(None),
                            False,
                        )
                    else:
                        # Use LARS
                        _, _, coefs = lars_path_gram(
                            Xy=row,
                            Gram=sub_covariance,
                            n_samples=row.size,
                            alpha_min=alpha / (n_features - 1),
                            copy_Gram=True,
                            eps=eps,
                            method="lars",
                            return_path=False,
                        )
                # Update the precision matrix
                precision_[idx, idx] = 1.0 / (
                    covariance_[idx, idx]
                    - np.dot(covariance_[indices != idx, idx], coefs)
                )
                precision_[indices != idx, idx] = -precision_[idx, idx] * coefs
                precision_[idx, indices != idx] = -precision_[idx, idx] * coefs
                coefs = np.dot(sub_covariance, coefs)
                covariance_[idx, indices != idx] = coefs
                covariance_[indices != idx, idx] = coefs
            if not np.isfinite(precision_.sum()):
                raise FloatingPointError(
                    "The system is too ill-conditioned " "for this solver"
                )
            d_gap = _dual_gap(emp_cov, precision_, alpha)
            cost = _objective(emp_cov, precision_, alpha)
            if verbose:
                print(
                    "[graphical_lasso] Iteration "
                    "% 3i, cost % 3.2e, dual gap %.3e" % (i, cost, d_gap)
                )
            if return_costs:
                costs.append((cost, d_gap))
            if np.abs(d_gap) < tol:
                break
            if not np.isfinite(cost) and i > 0:
                raise FloatingPointError(
                    "Non SPD result: the system is "
                    "too ill-conditioned for this solver"
                )
        else:
            warnings.warn(
                "graphical_lasso: did not converge after "
                "%i iteration: dual gap: %.3e" % (max_iter, d_gap),
                ConvergenceWarning,
            )
    except FloatingPointError as e:
        e.args = (e.args[0] + ". The system is too ill-conditioned for this solver",)
        raise e

    if return_costs:
        if return_n_iter:
            return covariance_, precision_, costs, i + 1
        else:
            return covariance_, precision_, costs
    else:
        if return_n_iter:
            return covariance_, precision_, i + 1
        else:
            return covariance_, precision_
コード例 #59
0
    def compute_density_PAk_gCorr(
        self,
        gauss_approx=True,
        alpha=1.0,
        log_den_PAk=None,
        log_den_PAk_err=None,
        comp_err=True,
    ):
        """
        finds the maximum likelihood solution of PAk likelihood + gCorr likelihood with deltaFijs
        computed using the gradients
        """
        # TODO: we need to impement the deltaFijs to be computed as a*l (as in PAk)

        # compute changes in free energy
        if self.Fij_array is None:
            self.compute_deltaFs_grads_semisum()

        if self.verb:
            print("PAk_gCorr density estimation started")
            sec = time.time()

        dc = np.empty(self.N, dtype=float)
        log_den = np.empty(self.N, dtype=float)
        log_den_err = np.zeros(self.N, dtype=float)
        prefactor = np.exp(
            self.intrinsic_dim / 2.0 * np.log(np.pi)
            - gammaln((self.intrinsic_dim + 2) / 2)
        )
        log_den_min = 9.9e300
        vij_list = []
        Fij_list = []
        Fij_var_list = []

        if gauss_approx is True:
            if self.verb:
                print("Maximising likelihood in Gaussian approximation")

            if log_den_PAk is not None and log_den_PAk_err is not None:
                self.log_den = log_den_PAk
                self.log_den_err = log_den_PAk_err

            else:
                self.compute_density_PAk()

            # compute adjacency matrix and cumulative changes
            A = sparse.lil_matrix((self.N, self.N), dtype=np.float_)

            supp_deltaF = sparse.lil_matrix((self.N, self.N), dtype=np.float_)

            # define redundancy factor for each A matrix entry as the geometric mean of the 2 corresponding k*
            k1 = self.kstar[self.nind_list[:, 0]]
            k2 = self.kstar[self.nind_list[:, 1]]
            redundancy = np.sqrt(k1 * k2)

            for nspar, indices in enumerate(self.nind_list):
                i = indices[0]
                j = indices[1]
                # tmp = 1.0 / self.Fij_var_array[nspar]
                tmp = 1.0 / self.Fij_var_array[nspar] / redundancy[nspar]
                A[i, j] = -tmp
                supp_deltaF[i, j] = self.Fij_array[nspar] * tmp

            A = alpha * sparse.lil_matrix(A + A.transpose())

            diag = (
                np.array(-A.sum(axis=1)).reshape((self.N,))
                + (1.0 - alpha) / self.log_den_err**2
            )

            A.setdiag(diag)

            deltaFcum = (
                alpha
                * (
                    np.array(supp_deltaF.sum(axis=0)).reshape((self.N,))
                    - np.array(supp_deltaF.sum(axis=1)).reshape((self.N,))
                )
                + (1.0 - alpha) * self.log_den / self.log_den_err**2
            )

            sec2 = time.time()
            if self.verb:
                print("{0:0.2f} seconds to fill sparse matrix".format(sec2 - sec))

            log_den = sparse.linalg.spsolve(A.tocsr(), deltaFcum)

            if self.verb:
                print(
                    "{0:0.2f} seconds to solve linear system".format(time.time() - sec2)
                )
            sec2 = time.time()

            self.log_den = log_den

            if comp_err is True:
                self.A = A.todense()
                self.B = slin.pinvh(self.A)
                # self.B = slin.inv(self.A)
                self.log_den_err = np.sqrt(np.diag(self.B))

            if self.verb:
                print("{0:0.2f} seconds inverting A matrix".format(time.time() - sec2))
            sec2 = time.time()

            # self.log_den_err = np.sqrt(diag/(np.array(np.sum(np.square(A.todense()),axis=1)).reshape(self.N,)))

        else:
            if self.verb:
                print("Solving via SGD")
            from dadapy.utils_.mlmax_pytorch import maximise_wPAk

            for i in range(self.N):

                Fij_list.append(
                    self.Fij_array[self.nind_iptr[i] : self.nind_iptr[i + 1]]
                )
                Fij_var_list.append(
                    self.Fij_var_array[self.nind_iptr[i] : self.nind_iptr[i + 1]]
                )

                dc[i] = self.distances[i, self.kstar[i]]
                rr = np.log(self.kstar[i]) - (
                    np.log(prefactor)
                    + self.intrinsic_dim * np.log(self.distances[i, self.kstar[i]])
                )
                log_den[i] = rr
                vj = np.zeros(self.kstar[i])
                for j in range(self.kstar[i]):
                    vj[j] = prefactor * (
                        pow(self.distances[i, j + 1], self.intrinsic_dim)
                        - pow(self.distances[i, j], self.intrinsic_dim)
                    )

                vij_list.append(vj)

            l_, log_den = maximise_wPAk(
                log_den,
                self.kstar,
                vij_list,
                self.dist_indices,
                Fij_list,
                Fij_var_list,
                alpha,
            )
            log_den -= np.log(self.N)

        self.log_den = log_den

        sec2 = time.time()
        if self.verb:
            print(
                "{0:0.2f} seconds for PAk_gCorr density estimation".format(sec2 - sec)
            )
コード例 #60
0
ファイル: hamiltonian.py プロジェクト: millskyle/nomad
def hamiltonian(traj_list, traj_alive, cent_list=None):
    """Builds the Hamiltonian matrix from a list of trajectories."""

    n_alive = len(traj_alive)
    if glbl.integrals.hermitian:
        n_elem = int(n_alive * (n_alive + 1) / 2)
    else:
        n_elem = n_alive * n_alive

    T = np.zeros((n_alive, n_alive), dtype=complex)
    V = np.zeros((n_alive, n_alive), dtype=complex)
    H = np.zeros((n_alive, n_alive), dtype=complex)
    S = np.zeros((n_alive, n_alive), dtype=complex)
    Snuc = np.zeros((n_alive, n_alive), dtype=complex)
    Sinv = np.zeros((n_alive, n_alive), dtype=complex)
    Sdot = np.zeros((n_alive, n_alive), dtype=complex)
    Heff = np.zeros((n_alive, n_alive), dtype=complex)
    t_ovrlp = np.zeros((n_alive, n_alive), dtype=complex)
    Sdnuc = np.zeros((n_alive, n_alive), dtype=complex)
    Sdele = np.zeros((n_alive, n_alive), dtype=complex)

    # now evaluate the hamiltonian matrix
    for ij in range(n_elem):
        if glbl.integrals.hermitian:
            i, j = ut_ind(ij)
        else:
            i, j = sq_ind(ij, n_alive)

        ii = traj_alive[i]
        jj = traj_alive[j]

        # nuclear overlap matrix (excluding electronic component)
        Snuc[i, j] = glbl.integrals.s_integral(traj_list[ii],
                                               traj_list[jj],
                                               nuc_only=True)

        # compute overlap of trajectories (different from S, which may or may
        # not involve integration in a gaussian basis
        t_ovrlp[i, j] = glbl.integrals.traj_overlap(traj_list[ii],
                                                    traj_list[jj],
                                                    Snuc=Snuc[i, j])

        # overlap matrix (including electronic component)
        S[i, j] = glbl.integrals.s_integral(traj_list[ii],
                                            traj_list[jj],
                                            Snuc=Snuc[i, j])

        # time-derivative of the overlap matrix (not hermitian in general)
        Sdot[i, j] = glbl.integrals.sdot_integral(traj_list[ii],
                                                  traj_list[jj],
                                                  Snuc=Snuc[i, j])
        Sdnuc[i, j] = glbl.integrals.sdot_integral(traj_list[ii],
                                                   traj_list[jj],
                                                   Snuc=Snuc[i, j],
                                                   nuc_only=True)
        Sdele[i, j] = glbl.integrals.sdot_integral(traj_list[ii],
                                                   traj_list[jj],
                                                   Snuc=Snuc[i, j],
                                                   e_only=True)

        # kinetic energy matrix
        T[i, j] = glbl.integrals.ke_integral(traj_list[ii],
                                             traj_list[jj],
                                             Snuc=Snuc[i, j])

        # potential energy matrix
        if glbl.integrals.require_centroids:
            V[i, j] = glbl.integrals.v_integral(traj_list[ii],
                                                traj_list[jj],
                                                centroid=cent_list[ii][jj],
                                                Snuc=Snuc[i, j])
        else:
            V[i, j] = glbl.integrals.v_integral(traj_list[ii],
                                                traj_list[jj],
                                                Snuc=Snuc[i, j])

        # Hamiltonian matrix in non-orthogonal basis
        H[i, j] = T[i, j] + V[i, j]

        # if hermitian matrix, set (j,i) indices
        if glbl.integrals.hermitian and i != j:
            Snuc[j, i] = Snuc[i, j].conjugate()
            S[j, i] = S[i, j].conjugate()
            t_ovrlp[j, i] = t_ovrlp[i, j].conjugate()
            Sdot[j, i] = glbl.integrals.sdot_integral(traj_list[jj],
                                                      traj_list[ii],
                                                      Snuc=Snuc[j, i])
            Sdnuc[j, i] = glbl.integrals.sdot_integral(traj_list[jj],
                                                       traj_list[ii],
                                                       Snuc=Snuc[j, i],
                                                       nuc_only=True)
            Sdele[j, i] = glbl.integrals.sdot_integral(traj_list[jj],
                                                       traj_list[ii],
                                                       Snuc=Snuc[j, i],
                                                       e_only=True)

            T[j, i] = T[i, j].conjugate()
            V[j, i] = V[i, j].conjugate()
            H[j, i] = H[i, j].conjugate()

    if glbl.integrals.hermitian:
        # compute the S^-1, needed to compute Heff
        timings.start('linalg.pinvh')
        Sinv = sp_linalg.pinvh(S)
        #        Sinv, cond = fms_linalg.pseudo_inverse2(S)
        timings.stop('linalg.pinvh')
    else:
        # compute the S^-1, needed to compute Heff
        timings.start('hamiltonian.pseudo_inverse')
        Sinv, cond = fms_linalg.pseudo_inverse(S)
        timings.stop('hamiltonian.pseudo_inverse')

    Heff = np.dot(Sinv, H - 1j * Sdot)

    fileio.print_bund_mat(0., 'sdot_nuc', Sdnuc)
    fileio.print_bund_mat(0., 'sdot_ele', Sdele)

    return t_ovrlp, T, V, S, Snuc, Sdot, Heff