def _update_precisions(self, X, z): """Update the variational distributions for the precisions""" n_features = X.shape[1] if self.covariance_type == 'spherical': self.dof_ = 0.5 * n_features * np.sum(z, axis=0) for k in range(self.n_components): # could be more memory efficient ? sq_diff = np.sum((X - self.means_[k]) ** 2, axis=1) self.scale_[k] = 1. self.scale_[k] += 0.5 * np.sum(z.T[k] * (sq_diff + n_features)) self.bound_prec_[k] = ( 0.5 * n_features * ( digamma(self.dof_[k]) - np.log(self.scale_[k]))) self.precs_ = np.tile(self.dof_ / self.scale_, [n_features, 1]).T elif self.covariance_type == 'diag': for k in range(self.n_components): self.dof_[k].fill(1. + 0.5 * np.sum(z.T[k], axis=0)) sq_diff = (X - self.means_[k]) ** 2 # see comment above self.scale_[k] = np.ones(n_features) + 0.5 * np.dot( z.T[k], (sq_diff + 1)) self.precs_[k] = self.dof_[k] / self.scale_[k] self.bound_prec_[k] = 0.5 * np.sum(digamma(self.dof_[k]) - np.log(self.scale_[k])) self.bound_prec_[k] -= 0.5 * np.sum(self.precs_[k]) elif self.covariance_type == 'tied': self.dof_ = 2 + X.shape[0] + n_features self.scale_ = (X.shape[0] + 1) * np.identity(n_features) for k in range(self.n_components): diff = X - self.means_[k] self.scale_ += np.dot(diff.T, z[:, k:k + 1] * diff) self.scale_ = pinvh(self.scale_) self.precs_ = self.dof_ * self.scale_ self.det_scale_ = linalg.det(self.scale_) self.bound_prec_ = 0.5 * wishart_log_det( self.dof_, self.scale_, self.det_scale_, n_features) self.bound_prec_ -= 0.5 * self.dof_ * np.trace(self.scale_) elif self.covariance_type == 'full': for k in range(self.n_components): sum_resp = np.sum(z.T[k]) self.dof_[k] = 2 + sum_resp + n_features self.scale_[k] = (sum_resp + 1) * np.identity(n_features) diff = X - self.means_[k] self.scale_[k] += np.dot(diff.T, z[:, k:k + 1] * diff) self.scale_[k] = pinvh(self.scale_[k]) self.precs_[k] = self.dof_[k] * self.scale_[k] self.det_scale_[k] = linalg.det(self.scale_[k]) self.bound_prec_[k] = 0.5 * wishart_log_det( self.dof_[k], self.scale_[k], self.det_scale_[k], n_features) self.bound_prec_[k] -= 0.5 * self.dof_[k] * np.trace( self.scale_[k])
def fit(self, evidence_approx_method="fixed-point",max_iter = 100): ''' Fits Bayesian linear regression, returns posterior mean and preision of parameters Parameters: ----------- max_iter: int Number of maximum iterations evidence_approx_method: str (DEFAULT = 'fixed-point') Method for approximating evidence, either 'fixed-point' or 'EM' # Theory Note: ----------------- This code implements two methods to fit type II ML Bayesian Linear Regression: Expectation Maximization and Fixed Point Iterations. Expectation Maximization is generally slower so by default we use fixed-point. ''' # use type II maximum likelihood to find hyperparameters alpha and beta self._evidence_approx(max_iter = max_iter, method = evidence_approx_method) # find parameters of posterior distribution after last update of alpha & beta self.w_mu, self.w_precision = self._posterior_params(self.alpha,self.beta) self.D = pinvh(self.w_precision)
def __init__(self, xs, ys, noise=0.001, l=1, K=K_SE): self.xs = xs self.l = l self.K = K Kxx = self.K(xs, l=self.l) self.KxxI = pinvh(Kxx + (noise**2) * eye_like(Kxx)) self.KxxI_ys = self.KxxI.dot(ys)
def test_simple_complex(self): a = array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=float) + 1j * array( [[10, 8, 7], [6, 5, 4], [3, 2, 1]], dtype=float ) a = np.dot(a, a.conj().T) a_pinv = pinvh(a) assert_array_almost_equal(np.dot(a, a_pinv), np.eye(3))
def _init_params(self,X): ''' Initialise parameters ''' d = X.shape[1] # initialise prior on means & precision matrices if 'means' in self.init_params: means0 = self.init_params['means'] else: kms = KMeans(n_init = self.n_init, n_clusters = self.n_components) means0 = kms.fit(X).cluster_centers_ if 'covar' in self.init_params: scale_inv0 = self.init_params['covar'] scale0 = pinvh(scale_inv0) else: # heuristics to define broad prior over precision matrix diag_els = np.abs(np.max(X,0) - np.min(X,0))/2 scale_inv0 = np.diag( diag_els ) scale0 = np.diag( 1./ diag_els ) if 'weights' in self.init_params: weights0 = np.ones(self.n_components) / self.n_components else: weights0 = np.ones(self.n_components) / self.n_components if 'dof' in self.init_params: dof0 = self.init_params['dof'] else: dof0 = d if 'beta' in self.init_params: beta0 = self.init_params['beta'] else: beta0 = 1e-3 # clusters that are not pruned self.active = np.ones(self.n_components, dtype = np.bool) # checks initialisation errors in case parameters are user defined assert dof0 >= d,( 'Degrees of freedom should be larger than ' 'dimensionality of data') assert means0.shape[0] == self.n_components,('Number of centrods defined should ' 'be equal to number of components') assert means0.shape[1] == d,('Dimensioanlity of means and data ' 'should be the same') assert weights0.shape[0] == self.n_components,('Number of weights should be ' 'to number of components') # At first iteration these parameters are equal to priors, but they change # at each iteration of mean field approximation scale = np.array([np.copy(scale0) for _ in range(self.n_components)]) means = np.copy(means0) weights = np.copy(weights0) dof = dof0*np.ones(self.n_components) beta = beta0*np.ones(self.n_components) init_ = [means0, scale0, scale_inv0, beta0, dof0, weights0] iter_ = [means, scale, scale_inv0, beta, dof, weights] return init_, iter_
def fit(self, X=None, y=None): """ The Gaussian Process model fitting method. Parameters ---------- X : double array_like An array with shape (n_samples, n_features) with the input at which observations were made. y : array_like, shape (n_samples, 3) An array with shape (n_eval, 3) with the observations of the output to be predicted. of shape (n_samples, 3) with the Best Linear Unbiased Prediction at x. Returns ------- gp : self A fitted Gaussian Process model object awaiting data to perform predictions. """ if X: K_list = self.calc_scalar_kernel_matrices(X) else: K_list = self.calc_scalar_kernel_matrices() # add diagonal noise to each scalar kernel matrix K_list = [K + self.nugget * sp.ones(K.shape[0]) for K in K_list] Kglob = None # outer_iv = [sp.outer(iv, iv.T) for iv in self.ivs] # NO, wrong for K, ivs, iv_corr in zip(K_list, self.ivs, self.iv_corr): # make the outer product tensor of shape (N_ls, N_ls, 3, 3) and multiply it with the scalar kernel K3D = iv_corr * K[:, :, None, None] * rotmat_multi(ivs, ivs) # reshape tensor onto a 2D array tiled with 3x3 matrix blocks if Kglob is None: Kglob = K3D else: Kglob += K3D Kglob = my_tensor_reshape(Kglob) # # all channels merged into one covariance matrix # # K^{glob}_{ij} = \sum_{k = 1}^{N_{IVs}} w_k D_{k, ij} |v_k^i\rangle \langle v_k^j | try: inv = LA.pinv2(Kglob) except LA.LinAlgError as err: print("pinv2 failed: %s. Switching to pinvh" % err) try: inv = LA.pinvh(Kglob) except LA.LinAlgError as err: print("pinvh failed: %s. Switching to pinv2" % err) inv = None # alpha is the vector of regression coefficients of GaussianProcess alpha = sp.dot(inv, self.y.ravel()) if not self.low_memory: self.inverse = inv self.Kglob = Kglob self.alpha = sp.array(alpha)
def nll(l): # negative log likelihood #if l < 0.001: return 1e10 Kxx = K(xs, l=l) Kxx += (noise**2) * eye_like(Kxx) res = (ys.T).dot(pinvh(Kxx)).dot(ys) + slogdet(Kxx)[1] res = squeeze(res) #print l,res return res
def test_nonpositive(self): a = array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=float) a = np.dot(a, a.T) u, s, vt = np.linalg.svd(a) s[0] *= -1 a = np.dot(u * s, vt) # a is now symmetric non-positive and singular a_pinv = pinv2(a) a_pinvh = pinvh(a) assert_array_almost_equal(a_pinv, a_pinvh)
def laplacian_sc_pinv(G, observed_nodelist, unobserved_nodelist, weight='weight'): """ Pseudo-inverse of Laplacian Schur complement. """ sc = laplacian_schur_complement(G, observed_nodelist, unobserved_nodelist, weight=weight) return pinvh(sc)
def error_matrix(self): """ Covariance Matrix. """ try: mask = self.flat_hess_.mask except AttributeError: mask = None return self._reshape_matrix( -np.ma.array(pinvh(self.flat_hess_.data), mask=mask))
def update_sigma(X, alpha_, lambda_, keep_lambda, n_samples): sigma_ = pinvh(np.eye(n_samples) / alpha_ + np.dot(X[:, keep_lambda] * np.reshape(1. / lambda_[keep_lambda], [1, -1]), X[:, keep_lambda].T)) sigma_ = np.dot(sigma_, X[:, keep_lambda] * np.reshape(1. / lambda_[keep_lambda], [1, -1])) sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1]) * X[:, keep_lambda].T, sigma_) sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda] return sigma_
def nll_prime(l): Kxx,Kps = K(xs, l=l, deriv=True) Kxx += (noise**2) * eye_like(Kxx) KxxI = pinvh(Kxx) a = KxxI.dot(ys) aaT = outer(a,a) # a . a.T KI_aaT = KxxI - aaT # K^-1 - aaT res = [] for Kp in Kps: grad = trace_prod(KI_aaT, Kp) res.append(grad) return asarray(res)
def _init_params(self,*args): ''' Initialise parameters of Bayesian Gaussian HMM ''' d,X = args pr_start, pr_trans = super(VBGaussianHMM,self)._init_params() # initialise prior on means & precision matrices if 'means' in self.init_params: means0 = check_array(self.init_params['means']) else: kms = KMeans(n_init = 2, n_clusters = self.n_hidden) means0 = kms.fit(X).cluster_centers_ if 'covar' in self.init_params: scale_inv0 = self.init_params['covar'] scale0 = pinvh(scale_inv0) else: # heuristics to define broad prior over precision matrix diag_els = np.abs(np.max(X,0) - np.min(X,0)) scale_inv0 = np.diag( diag_els ) scale0 = np.diag( 1./ diag_els ) if 'dof' in self.init_params: dof0 = self.init_params['dof'] else: dof0 = d if 'beta' in self.init_params: beta0 = self.init_params['beta'] else: beta0 = 1e-3 # checks initialisation errors in case parameters are user defined if dof0 < d: raise ValueError(( 'Degrees of freedom should be larger than ' 'dimensionality of data')) if means0.shape[0] != self.n_hidden: raise ValueError(('Number of centrods defined should ' 'be equal to number of components' )) if means0.shape[1] != d: raise ValueError(('Dimensionality of means and data ' 'should be the same')) scale = np.array([np.copy(scale0) for _ in range(self.n_hidden)]) dof = dof0*np.ones(self.n_hidden) beta = beta0*np.ones(self.n_hidden) # if user did not define initialisation parameters use KMeans return pr_start, pr_trans, {'means':means0,'scale':scale,'beta': beta, 'dof':dof,'scale_inv0':scale_inv0}
def get_precision(self): """Getter for the precision matrix. Returns ------- precision_ : array-like, The precision matrix associated to the current covariance object. """ if self.store_precision: precision = self.precision_ else: precision = linalg.pinvh(self.covariance_) return precision
def _update_params(self, Nk, Xk, Sk, beta0, means0, dof0, scale_inv0, beta, means, dof, scale): ''' Updates distribution of means and precisions ''' for k in range(self.n_active): # update mean and precision for each cluster beta[k] = beta0 + Nk[k] means[k] = (beta0*means0[k,:] + Xk[k]) / beta[k] dof[k] = dof0 + Nk[k] + 1 # precision calculation is ugly but prevent overflow & underflow scale[k,:,:] = pinvh( scale_inv0 + (beta0*Sk[k] + Nk[k]*Sk[k] - np.outer(Xk[k],Xk[k]) - beta0*np.outer(means0[k,:] - Xk[k],means0[k,:])) / (beta0 + Nk[k]) ) return beta,means,dof,scale
def fit(self, X, y=None): """Fits a Minimum Covariance Determinant with the FastMCD algorithm. Parameters ---------- X : array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. y not used, present for API consistence purpose. Returns ------- self : object """ X = check_array(X, ensure_min_samples=2, estimator='MinCovDet') random_state = check_random_state(self.random_state) n_samples, n_features = X.shape # check that the empirical covariance is full rank if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features: warnings.warn("The covariance matrix associated to your dataset " "is not full rank") # compute and store raw estimates raw_location, raw_covariance, raw_support, raw_dist = fast_mcd( X, support_fraction=self.support_fraction, cov_computation_method=self._nonrobust_covariance, random_state=random_state) if self.assume_centered: raw_location = np.zeros(n_features) raw_covariance = self._nonrobust_covariance(X[raw_support], assume_centered=True) # get precision matrix in an optimized way precision = linalg.pinvh(raw_covariance) raw_dist = np.sum(np.dot(X, precision) * X, 1) self.raw_location_ = raw_location self.raw_covariance_ = raw_covariance self.raw_support_ = raw_support self.location_ = raw_location self.support_ = raw_support self.dist_ = raw_dist # obtain consistency at normal models self.correct_covariance(X) # re-weight estimator self.reweight_covariance(X) return self
def _vbm_emission_params(self,emission_params_prior, emission_params, sf_stats): ''' Performs vbm step for parameters of emission probabilities ''' Nk,Xk,Sk = sf_stats beta0, means0 = emission_params_prior['beta'], emission_params_prior['means'] emission_params['beta'] = beta0 + Nk emission_params['means'] = ((beta0*means0.T + Xk.T ) / emission_params['beta']).T emission_params['dof'] = emission_params_prior['dof'] + Nk + 1 scale_inv0 = emission_params_prior['scale_inv0'] for k in range(self.n_hidden): emission_params['scale'][k] = pinvh( scale_inv0 + (beta0*Sk[k] + Nk[k]*Sk[k] - np.outer(Xk[k],Xk[k]) - beta0*np.outer(means0[k] - Xk[k],means0[k])) / (beta0 + Nk[k]) ) return emission_params
def inversion_checker(X,alpha,beta): ''' Checks accuracy of inversion ''' n,m = X.shape u,d,vh = np.linalg.svd(X,full_matrices = False) dsq = d**2 # precision matrix S = beta*np.dot(X.T,X) + alpha*np.eye(m) # inverting precision : PREVIOUS VERSION a1 = np.dot( np.dot(vh.T, np.diag( 1. / (beta*dsq + alpha)) ), vh) # inverting precision : CURRENT VERSION a2 = pinvh(S) return [a1,a2]
def test_bayesian_ridge_score_values(): """Check value of score on toy example. Compute log marginal likelihood with equation (36) in Sparse Bayesian Learning and the Relevance Vector Machine (Tipping, 2001): - 0.5 * (log |Id/alpha + X.X^T/lambda| + y^T.(Id/alpha + X.X^T/lambda).y + n * log(2 * pi)) + lambda_1 * log(lambda) - lambda_2 * lambda + alpha_1 * log(alpha) - alpha_2 * alpha and check equality with the score computed during training. """ X, y = diabetes.data, diabetes.target n_samples = X.shape[0] # check with initial values of alpha and lambda (see code for the values) eps = np.finfo(np.float64).eps alpha_ = 1. / (np.var(y) + eps) lambda_ = 1. # value of the parameters of the Gamma hyperpriors alpha_1 = 0.1 alpha_2 = 0.1 lambda_1 = 0.1 lambda_2 = 0.1 # compute score using formula of docstring score = lambda_1 * log(lambda_) - lambda_2 * lambda_ score += alpha_1 * log(alpha_) - alpha_2 * alpha_ M = 1. / alpha_ * np.eye(n_samples) + 1. / lambda_ * np.dot(X, X.T) M_inv = pinvh(M) score += - 0.5 * (fast_logdet(M) + np.dot(y.T, np.dot(M_inv, y)) + n_samples * log(2 * np.pi)) # compute score with BayesianRidge clf = BayesianRidge(alpha_1=alpha_1, alpha_2=alpha_2, lambda_1=lambda_1, lambda_2=lambda_2, n_iter=1, fit_intercept=False, compute_score=True) clf.fit(X, y) assert_almost_equal(clf.scores_[0], score, decimal=9)
def _set_covariance(self, covariance): """Saves the covariance and precision estimates Storage is done accordingly to `self.store_precision`. Precision stored only if invertible. Parameters ---------- covariance : 2D ndarray, shape (n_features, n_features) Estimated covariance matrix to be stored, and from which precision is computed. """ # covariance = check_array(covariance) # set covariance self.covariance_ = covariance # set precision if self.store_precision: self.precision_ = linalg.pinvh(covariance) else: self.precision_ = None
def fit(self,X,chain_index = []): ''' Fits Bayesian Hidden Markov Model with Gaussian emission probabilities Parameters ---------- X: array-like or csr_matrix of size (n_samples, n_features) Data Matrix Returns ------- object: self self ''' # preprocess data, X = self._check_X_train(X) super(VBGaussianHMM,self)._fit(X, chain_index) self.means_ = self._emission_params_['means'] scale, dof = self._emission_params_['scale'], self._emission_params_['dof'] self.covars_ = np.asarray([1./df * pinvh(sc) for sc,df in zip(scale,dof)]) return self
def compute_kernel_row(self): K=np.zeros((self.nrow,self.nrow)) drow={} for i in range(self.nrow): iA=np.where(self.xdata[i]>0)[0] nsub=len(iA) if nsub>0: A=self.base_vectors[iA] AAT=self.kernel_col[iA.reshape((nsub,1)),iA] AATI=sp_lin.pinvh(AAT) drow[i]=(np.dot(AATI,A),iA) else: drow[i]=(None,np.array([])) for i in range(self.nrow): (BBTI,iB)=drow[i] if len(iB)>0: for j in range(i+1): (AATI,iA)=drow[j] ## = trace(Vhi.T,Vhj)=\braket{Vhi,Vhj}_{Frobenius} ## (BB^T)^{-1}BA^T(AA^T)^{-1} if len(iA)>0: BAT=self.kernel_col[iB.reshape((len(iB),1)),iA] ## xP=np.diag(np.dot(BBTI.T,np.dot(BAT,AATI))) xP=np.dot(BBTI,AATI.T)*BAT K[i,j]=np.sum(xP) K[j,i]=K[i,j] ## if i%1000==0: ## print(i) d1=np.diag(K) d2=d1 self.kernel_row=kernel_eval_nl(K,d1,d2,self.param_row) print('Modular kernel done') return
def _posterior(self, X, Y, alpha0, w0, full_covar = False): ''' Iteratively refitted least squares method using l_bfgs_b. Finds MAP estimates for weights and Hessian at convergence point ''' if self.solver == 'lbfgs_b': f = lambda w: _logistic_loss_and_grad(w,X,Y,alpha0) w = fmin_l_bfgs_b(f, x0 = w0, pgtol = self.tol_solver, maxiter = self.n_iter_solver)[0] elif self.solver == 'newton_cg': f = _logistic_loss grad = lambda w,*args: _logistic_loss_and_grad(w,*args)[1] hess = _logistic_grad_hess args = (X,Y,alpha0) w = newton_cg(hess, f, grad, w0, args=args, maxiter=self.n_iter, tol=self.tol)[0] else: raise NotImplementedError('Liblinear solver is not yet implemented') # calculate negative of Hessian at w if self.fit_intercept: XW = np.dot(X,w[:-1]) + w[-1] else: XW = np.dot(X,w) s = expit(XW) R = s * (1 - s) negHessian = np.dot(X.T*R,X) # do not regularise constant alpha_vec = np.zeros(negHessian.shape[0]) alpha_vec = alpha0 np.fill_diagonal(negHessian,np.diag(negHessian) + alpha_vec) if full_covar is False: eigs = 1./eigvalsh(negHessian) return [w,eigs] else: inv = pinvh(negHessian) return [w, inv]
def fit(self, X, y): """Fit the ARDRegression model according to the given training data and parameters. Iterative procedure to maximize the evidence Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target values (integers). Will be cast to X's dtype if necessary Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) n_samples, n_features = X.shape coef_ = np.zeros(n_features) X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data( X, y, self.fit_intercept, self.normalize, self.copy_X) # Launch the convergence loop keep_lambda = np.ones(n_features, dtype=bool) lambda_1 = self.lambda_1 lambda_2 = self.lambda_2 alpha_1 = self.alpha_1 alpha_2 = self.alpha_2 verbose = self.verbose # Initialization of the values of the parameters alpha_ = 1. / np.var(y) lambda_ = np.ones(n_features) self.scores_ = list() coef_old_ = None # Iterative procedure of ARDRegression for iter_ in range(self.n_iter): # Compute mu and sigma (using Woodbury matrix identity) sigma_ = pinvh( np.eye(n_samples) / alpha_ + np.dot( X[:, keep_lambda] * np.reshape(1. / lambda_[keep_lambda], [1, -1]), X[:, keep_lambda].T)) sigma_ = np.dot( sigma_, X[:, keep_lambda] * np.reshape(1. / lambda_[keep_lambda], [1, -1])) sigma_ = -np.dot( np.reshape(1. / lambda_[keep_lambda], [-1, 1]) * X[:, keep_lambda].T, sigma_) sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda] coef_[keep_lambda] = alpha_ * np.dot( sigma_, np.dot(X[:, keep_lambda].T, y)) # Update alpha and lambda rmse_ = np.sum((y - np.dot(X, coef_))**2) gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_) lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) / ((coef_[keep_lambda])**2 + 2. * lambda_2)) alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) / (rmse_ + 2. * alpha_2)) # Prune the weights with a precision over a threshold keep_lambda = lambda_ < self.threshold_lambda coef_[~keep_lambda] = 0 # Compute the objective function if self.compute_score: s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum() s += alpha_1 * log(alpha_) - alpha_2 * alpha_ s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) + np.sum(np.log(lambda_))) s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_**2).sum()) self.scores_.append(s) # Check for convergence if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol: if verbose: print("Converged after %s iterations" % iter_) break coef_old_ = np.copy(coef_) self.coef_ = coef_ self.alpha_ = alpha_ self.sigma_ = sigma_ self.lambda_ = lambda_ self._set_intercept(X_offset_, y_offset_, X_scale_) return self
def graph_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4, enet_tol=1e-4, max_iter=100, verbose=False, return_costs=False, eps=np.finfo(np.float64).eps, return_n_iter=False): """l1-penalized covariance estimator Read more in the :ref:`User Guide <sparse_inverse_covariance>`. Parameters ---------- emp_cov : 2D ndarray, shape (n_features, n_features) Empirical covariance from which to compute the covariance estimate. alpha : positive float The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. cov_init : 2D array (n_features, n_features), optional The initial guess for the covariance. mode : {'cd', 'lars'} The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable. tol : positive float, optional The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. enet_tol : positive float, optional The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. max_iter : integer, optional The maximum number of iterations. verbose : boolean, optional If verbose is True, the objective function and dual gap are printed at each iteration. return_costs : boolean, optional If return_costs is True, the objective function and dual gap at each iteration are returned. eps : float, optional The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. return_n_iter : bool, optional Whether or not to return the number of iterations. Returns ------- covariance : 2D ndarray, shape (n_features, n_features) The estimated covariance matrix. precision : 2D ndarray, shape (n_features, n_features) The estimated (sparse) precision matrix. costs : list of (objective, dual_gap) pairs The list of values of the objective function and the dual gap at each iteration. Returned only if return_costs is True. n_iter : int Number of iterations. Returned only if `return_n_iter` is set to True. See Also -------- GraphLasso, GraphLassoCV Notes ----- The algorithm employed to solve this problem is the GLasso algorithm, from the Friedman 2008 Biostatistics paper. It is the same algorithm as in the R `glasso` package. One possible difference with the `glasso` R package is that the diagonal coefficients are not penalized. """ _, n_features = emp_cov.shape if alpha == 0: if return_costs: precision_ = linalg.inv(emp_cov) cost = - 2. * log_likelihood(emp_cov, precision_) cost += n_features * np.log(2 * np.pi) d_gap = np.sum(emp_cov * precision_) - n_features if return_n_iter: return emp_cov, precision_, (cost, d_gap), 0 else: return emp_cov, precision_, (cost, d_gap) else: if return_n_iter: return emp_cov, linalg.inv(emp_cov), 0 else: return emp_cov, linalg.inv(emp_cov) if cov_init is None: covariance_ = emp_cov.copy() else: covariance_ = cov_init.copy() # As a trivial regularization (Tikhonov like), we scale down the # off-diagonal coefficients of our starting point: This is needed, as # in the cross-validation the cov_init can easily be # ill-conditioned, and the CV loop blows. Beside, this takes # conservative stand-point on the initial conditions, and it tends to # make the convergence go faster. covariance_ *= 0.95 diagonal = emp_cov.flat[::n_features + 1] covariance_.flat[::n_features + 1] = diagonal precision_ = linalg.pinvh(covariance_) indices = np.arange(n_features) costs = list() # The different l1 regression solver have different numerical errors if mode == 'cd': errors = dict(over='raise', invalid='ignore') else: errors = dict(invalid='raise') try: # be robust to the max_iter=0 edge case, see: # https://github.com/scikit-learn/scikit-learn/issues/4134 d_gap = np.inf for i in range(max_iter): for idx in range(n_features): sub_covariance = np.ascontiguousarray( covariance_[indices != idx].T[indices != idx]) row = emp_cov[idx, indices != idx] with np.errstate(**errors): if mode == 'cd': # Use coordinate descent coefs = -(precision_[indices != idx, idx] / (precision_[idx, idx] + 1000 * eps)) coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram( coefs, alpha, 0, sub_covariance, row, row, max_iter, enet_tol, check_random_state(None), False) else: # Use LARS _, _, coefs = lars_path( sub_covariance, row, Xy=row, Gram=sub_covariance, alpha_min=alpha / (n_features - 1), copy_Gram=True, method='lars', return_path=False) # Update the precision matrix precision_[idx, idx] = ( 1. / (covariance_[idx, idx] - np.dot(covariance_[indices != idx, idx], coefs))) precision_[indices != idx, idx] = (- precision_[idx, idx] * coefs) precision_[idx, indices != idx] = (- precision_[idx, idx] * coefs) coefs = np.dot(sub_covariance, coefs) covariance_[idx, indices != idx] = coefs covariance_[indices != idx, idx] = coefs d_gap = _dual_gap(emp_cov, precision_, alpha) cost = _objective(emp_cov, precision_, alpha) if verbose: print( '[graph_lasso] Iteration % 3i, cost % 3.2e, dual gap %.3e' % (i, cost, d_gap)) if return_costs: costs.append((cost, d_gap)) if np.abs(d_gap) < tol: break if not np.isfinite(cost) and i > 0: raise FloatingPointError('Non SPD result: the system is ' 'too ill-conditioned for this solver') else: warnings.warn('graph_lasso: did not converge after %i iteration:' ' dual gap: %.3e' % (max_iter, d_gap), ConvergenceWarning) except FloatingPointError as e: e.args = (e.args[0] + '. The system is too ill-conditioned for this solver',) raise e if return_costs: if return_n_iter: return covariance_, precision_, costs, i + 1 else: return covariance_, precision_, costs else: if return_n_iter: return covariance_, precision_, i + 1 else: return covariance_, precision_
def _c_step(X, n_support, random_state, remaining_iterations=30, initial_estimates=None, verbose=False, cov_computation_method=empirical_covariance): n_samples, n_features = X.shape dist = np.inf # Initialisation support = np.zeros(n_samples, dtype=bool) if initial_estimates is None: # compute initial robust estimates from a random subset support[random_state.permutation(n_samples)[:n_support]] = True else: # get initial robust estimates from the function parameters location = initial_estimates[0] covariance = initial_estimates[1] # run a special iteration for that case (to get an initial support) precision = linalg.pinvh(covariance) X_centered = X - location dist = (np.dot(X_centered, precision) * X_centered).sum(1) # compute new estimates support[np.argsort(dist)[:n_support]] = True X_support = X[support] location = X_support.mean(0) covariance = cov_computation_method(X_support) # Iterative procedure for Minimum Covariance Determinant computation det = fast_logdet(covariance) # If the data already has singular covariance, calculate the precision, # as the loop below will not be entered. if np.isinf(det): precision = linalg.pinvh(covariance) previous_det = np.inf while (det < previous_det and remaining_iterations > 0 and not np.isinf(det)): # save old estimates values previous_location = location previous_covariance = covariance previous_det = det previous_support = support # compute a new support from the full data set mahalanobis distances precision = linalg.pinvh(covariance) X_centered = X - location dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1) # compute new estimates support = np.zeros(n_samples, dtype=bool) support[np.argsort(dist)[:n_support]] = True X_support = X[support] location = X_support.mean(axis=0) covariance = cov_computation_method(X_support) det = fast_logdet(covariance) # update remaining iterations for early stopping remaining_iterations -= 1 previous_dist = dist dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1) # Check if best fit already found (det => 0, logdet => -inf) if np.isinf(det): results = location, covariance, det, support, dist # Check convergence if np.allclose(det, previous_det): # c_step procedure converged if verbose: print("Optimal couple (location, covariance) found before" " ending iterations (%d left)" % (remaining_iterations)) results = location, covariance, det, support, dist elif det > previous_det: # determinant has increased (should not happen) warnings.warn("Determinant has increased; this should not happen: " "log(det) > log(previous_det) (%.15f > %.15f). " "You may want to try with a higher value of " "support_fraction (current value: %.3f)." % (det, previous_det, n_support / n_samples), RuntimeWarning) results = previous_location, previous_covariance, \ previous_det, previous_support, previous_dist # Check early stopping if remaining_iterations == 0: if verbose: print('Maximum number of iterations reached') results = location, covariance, det, support, dist return results
def fast_mcd(X, support_fraction=None, cov_computation_method=empirical_covariance, random_state=None): """Estimates the Minimum Covariance Determinant matrix. Read more in the :ref:`User Guide <robust_covariance>`. Parameters ---------- X : array-like, shape (n_samples, n_features) The data matrix, with p features and n samples. support_fraction : float, 0 < support_fraction < 1 The proportion of points to be included in the support of the raw MCD estimate. Default is None, which implies that the minimum value of support_fraction will be used within the algorithm: `[n_sample + n_features + 1] / 2`. cov_computation_method : callable, default empirical_covariance The function which will be used to compute the covariance. Must return shape (n_features, n_features) random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Notes ----- The FastMCD algorithm has been introduced by Rousseuw and Van Driessen in "A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS". The principle is to compute robust estimates and random subsets before pooling them into a larger subsets, and finally into the full data set. Depending on the size of the initial sample, we have one, two or three such computation levels. Note that only raw estimates are returned. If one is interested in the correction and reweighting steps described in [RouseeuwVan]_, see the MinCovDet object. References ---------- .. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS .. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun, Asymptotics For The Minimum Covariance Determinant Estimator, The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400 Returns ------- location : array-like, shape (n_features,) Robust location of the data. covariance : array-like, shape (n_features, n_features) Robust covariance of the features. support : array-like, type boolean, shape (n_samples,) A mask of the observations that have been used to compute the robust location and covariance estimates of the data set. """ random_state = check_random_state(random_state) X = check_array(X, ensure_min_samples=2, estimator='fast_mcd') n_samples, n_features = X.shape # minimum breakdown value if support_fraction is None: n_support = int(np.ceil(0.5 * (n_samples + n_features + 1))) else: n_support = int(support_fraction * n_samples) # 1-dimensional case quick computation # (Rousseeuw, P. J. and Leroy, A. M. (2005) References, in Robust # Regression and Outlier Detection, John Wiley & Sons, chapter 4) if n_features == 1: if n_support < n_samples: # find the sample shortest halves X_sorted = np.sort(np.ravel(X)) diff = X_sorted[n_support:] - X_sorted[:(n_samples - n_support)] halves_start = np.where(diff == np.min(diff))[0] # take the middle points' mean to get the robust location estimate location = 0.5 * (X_sorted[n_support + halves_start] + X_sorted[halves_start]).mean() support = np.zeros(n_samples, dtype=bool) X_centered = X - location support[np.argsort(np.abs(X_centered), 0)[:n_support]] = True covariance = np.asarray([[np.var(X[support])]]) location = np.array([location]) # get precision matrix in an optimized way precision = linalg.pinvh(covariance) dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1) else: support = np.ones(n_samples, dtype=bool) covariance = np.asarray([[np.var(X)]]) location = np.asarray([np.mean(X)]) X_centered = X - location # get precision matrix in an optimized way precision = linalg.pinvh(covariance) dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1) # Starting FastMCD algorithm for p-dimensional case if (n_samples > 500) and (n_features > 1): # 1. Find candidate supports on subsets # a. split the set in subsets of size ~ 300 n_subsets = n_samples // 300 n_samples_subsets = n_samples // n_subsets samples_shuffle = random_state.permutation(n_samples) h_subset = int(np.ceil(n_samples_subsets * (n_support / float(n_samples)))) # b. perform a total of 500 trials n_trials_tot = 500 # c. select 10 best (location, covariance) for each subset n_best_sub = 10 n_trials = max(10, n_trials_tot // n_subsets) n_best_tot = n_subsets * n_best_sub all_best_locations = np.zeros((n_best_tot, n_features)) try: all_best_covariances = np.zeros((n_best_tot, n_features, n_features)) except MemoryError: # The above is too big. Let's try with something much small # (and less optimal) n_best_tot = 10 all_best_covariances = np.zeros((n_best_tot, n_features, n_features)) n_best_sub = 2 for i in range(n_subsets): low_bound = i * n_samples_subsets high_bound = low_bound + n_samples_subsets current_subset = X[samples_shuffle[low_bound:high_bound]] best_locations_sub, best_covariances_sub, _, _ = select_candidates( current_subset, h_subset, n_trials, select=n_best_sub, n_iter=2, cov_computation_method=cov_computation_method, random_state=random_state) subset_slice = np.arange(i * n_best_sub, (i + 1) * n_best_sub) all_best_locations[subset_slice] = best_locations_sub all_best_covariances[subset_slice] = best_covariances_sub # 2. Pool the candidate supports into a merged set # (possibly the full dataset) n_samples_merged = min(1500, n_samples) h_merged = int(np.ceil(n_samples_merged * (n_support / float(n_samples)))) if n_samples > 1500: n_best_merged = 10 else: n_best_merged = 1 # find the best couples (location, covariance) on the merged set selection = random_state.permutation(n_samples)[:n_samples_merged] locations_merged, covariances_merged, supports_merged, d = \ select_candidates( X[selection], h_merged, n_trials=(all_best_locations, all_best_covariances), select=n_best_merged, cov_computation_method=cov_computation_method, random_state=random_state) # 3. Finally get the overall best (locations, covariance) couple if n_samples < 1500: # directly get the best couple (location, covariance) location = locations_merged[0] covariance = covariances_merged[0] support = np.zeros(n_samples, dtype=bool) dist = np.zeros(n_samples) support[selection] = supports_merged[0] dist[selection] = d[0] else: # select the best couple on the full dataset locations_full, covariances_full, supports_full, d = \ select_candidates( X, n_support, n_trials=(locations_merged, covariances_merged), select=1, cov_computation_method=cov_computation_method, random_state=random_state) location = locations_full[0] covariance = covariances_full[0] support = supports_full[0] dist = d[0] elif n_features > 1: # 1. Find the 10 best couples (location, covariance) # considering two iterations n_trials = 30 n_best = 10 locations_best, covariances_best, _, _ = select_candidates( X, n_support, n_trials=n_trials, select=n_best, n_iter=2, cov_computation_method=cov_computation_method, random_state=random_state) # 2. Select the best couple on the full dataset amongst the 10 locations_full, covariances_full, supports_full, d = select_candidates( X, n_support, n_trials=(locations_best, covariances_best), select=1, cov_computation_method=cov_computation_method, random_state=random_state) location = locations_full[0] covariance = covariances_full[0] support = supports_full[0] dist = d[0] return location, covariance, support, dist
def equality_time_graphical_lasso( S, K_init, max_iter, loss, C, rho, # n_samples=None, psi, gamma, tol, rtol, verbose, return_history, return_n_iter, mode, compute_objective, stop_at, stop_when, update_rho_options, init): """Equality constrained time-varying graphical LASSO solver. Solves the following problem via ADMM: min sum_{i=1}^T ||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1}) s.t. objective = c_i for i = 1, ..., T where S_i = (1/n_i) X_i^T X_i is the empirical covariance of data matrix X (training observations by features). Parameters ---------- emp_cov : ndarray, shape (n_features, n_features) Empirical covariance of data. rho : float, optional Augmented Lagrangian parameter. max_iter : int, optional Maximum number of iterations. n_samples : ndarray Number of samples available for each time point. gamma: float, optional Kernel parameter when psi is chosen to be 'kernel'. constrained_to: float or ndarray, shape (time steps) Log likelihood constraints for K_i tol : float, optional Absolute tolerance for convergence. rtol : float, optional Relative tolerance for convergence. return_history : bool, optional Return the history of computed values. return_n_iter : bool, optional Return the number of iteration before convergence. verbose : bool, default False Print info at each iteration. update_rho_options : dict, optional Arguments for the rho update. See regain.update_rules.update_rho function for more information. compute_objective : bool, default True Choose to compute the objective value. init : {'empirical', 'zero', ndarray} Choose how to initialize the precision matrix, with the inverse empirical covariance, zero matrix or precomputed. Returns ------- K : numpy.array, 3-dimensional (T x d x d) Solution to the problem for each time t=1...T . history : list If return_history, then also a structure that contains the objective value, the primal and dual residual norms, and tolerances for the primal and dual residual norms at each iteration. """ psi, prox_psi, psi_node_penalty = check_norm_prox(psi) psi_name = psi.__name__ if loss == 'LL': loss_function = neg_logl else: loss_function = dtrace K = K_init Z_0 = K.copy() Z_1 = K.copy()[:-1] Z_2 = K.copy()[1:] u = np.zeros((S.shape[0])) U_0 = np.zeros_like(Z_0) U_1 = np.zeros_like(Z_1) U_2 = np.zeros_like(Z_2) Z_0_old = np.zeros_like(Z_0) Z_1_old = np.zeros_like(Z_1) Z_2_old = np.zeros_like(Z_2) I = np.eye(S.shape[1]) checks = [ convergence( obj=equality_objective(loss_function, S, K, C, Z_0, Z_1, Z_2, psi)) ] for iteration_ in range(max_iter): # update K A_K = U_0 - Z_0 A_K[:-1] += Z_1 - U_1 A_K[1:] += Z_2 - U_2 A_K += A_K.transpose(0, 2, 1) A_K /= 2. K = soft_thresholding_od(A_K, lamda=1. / rho) # update Z_0 residual_loss_constraint_u = loss_gen(loss_function, S, Z_0) - C + u A_Z = K + U_0 A_Z += A_Z.transpose(0, 2, 1) A_Z /= 2. if loss_function == neg_logl: A_Z -= residual_loss_constraint_u[:, None, None] * S Z_0 = np.array([ prox_logdet_constrained(_A, _a, I) for _A, _a in zip(A_Z, residual_loss_constraint_u) ]) elif loss_function == dtrace: Z_0 = np.array([ prox_dtrace_constrained(_A, _S, _a, I) for _A, _S, _a in zip(A_Z, S, residual_loss_constraint_u) ]) # other Zs A_1 = K[:-1] + U_1 A_2 = K[1:] + U_2 if not psi_node_penalty: prox_e = prox_psi(A_2 - A_1, lamda=2. / rho) Z_1 = .5 * (A_1 + A_2 - prox_e) Z_2 = .5 * (A_1 + A_2 + prox_e) else: Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1), lamda=.5 / rho, rho=rho, tol=tol, rtol=rtol, max_iter=max_iter) # update residuals residual_loss_constraint = loss_gen(loss_function, S, Z_0) - C u += residual_loss_constraint U_0 += K - Z_0 U_1 += K[:-1] - Z_1 U_2 += K[1:] - Z_2 print(residual_loss_constraint) # diagnostics, reporting, termination checks rnorm = np.sqrt( np.sum(residual_loss_constraint**2) + squared_norm(K - Z_0) + squared_norm(K[:-1] - Z_1) + squared_norm(K[1:] - Z_2)) snorm = rho * np.sqrt( squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old)) obj = equality_objective(loss_function, S, K, C, Z_0, Z_1, Z_2, psi) if compute_objective else np.nan check = convergence( obj=obj, rnorm=rnorm, snorm=snorm, e_pri=np.sqrt(Z_0.size + 2 * Z_1.size + S.shape[0]) * tol + rtol * max( np.sqrt( np.sum(C**2) + squared_norm(Z_0) + squared_norm(Z_1) + squared_norm(Z_2)), np.sqrt( np.sum( (residual_loss_constraint + C)**2) + squared_norm(K) + squared_norm(K[:-1]) + squared_norm(K[1:]))), e_dual=np.sqrt(Z_0.size + 2 * Z_1.size) * tol + rtol * rho * np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)), ) Z_0_old = Z_0.copy() Z_1_old = Z_1.copy() Z_2_old = Z_2.copy() if verbose: print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) checks.append(check) if stop_at is not None: if abs(check.obj - stop_at) / abs(stop_at) < stop_when: break if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) # scaled dual variables should be also rescaled u *= rho / rho_new U_0 *= rho / rho_new U_1 *= rho / rho_new U_2 *= rho / rho_new rho = rho_new #assert is_pos_def(Z_0) else: warnings.warn("Objective did not converge.") covariance_ = np.array([linalg.pinvh(x) for x in K]) return_list = [K, covariance_] if return_history: return_list.append(checks) if return_n_iter: return_list.append(iteration_ + 1) return return_list
def _impute(merged_snps, ref, annot, taus, gwas_n, obs, to_impute, obsZ, ridge, run_fizi): """ this is the internal logic for the imputation I refactored this into diff function to improve flexibility for any changes downstream (e.g., MI, sampling, sketching, etc) testing out multiple imputation (MI) for the functional part of fizi we could incorporate MI into the estimation of LD as well but it might come with a big computational hit one cool trick might be to use sketching to speed up LD estimation to maintain performance for MI :param merged_snps: pyfizi.MergedPanel object containing merged GWAS and LDRef data :param ref: pyfizi.RefPanel object for reference genotype data at the region :param annot: pyfizi.Annot object representing the functional annotations at the region (default: None) :param taus: pyfizi.Tau object representing the prior variance terms for functional categories (default: None) :param gwas_n: numpy.ndarray or int GWAS sample size. If int assumes sample size is uniform at each SNP. Not required if 'N' is column in GWAS data (default: None) :param obsZ: numpy.ndarray vector of observed Z-scores that have been flipped to match ref panel :param obs: numpy.ndarray boolean vector marking which rows in `merged_snps` have observed Z-scores :param to_impute: numpy.ndarray boolean vector marking which rows in `merged_snps` need to be imputed :param ridge: float Ridge term to regularize LD estimation (default=0.1) :param run_fizi: bool indicating if fizi or impg is run :return: (numpy.ndarray imputed_z, numpy.ndarray pvalues, numpy.ndarray r2blups) """ from numpy.linalg import multi_dot as mdot from scipy.linalg import pinvh from scipy.stats import chi2 log = logging.getLogger(pyfizi.LOG) nobs = np.sum(obs) nimp = np.sum(to_impute) # compute linkage-disequilibrium estimate log.debug("Estimating LD for {} SNPs".format(len(merged_snps))) LD = ref.estimate_ld(merged_snps, adjust=ridge) log.debug("Partitioning LD into quadrants") Voo_ld = LD[obs].T[obs].T Vuo_ld = LD[to_impute].T[obs].T Vou_ld = Vuo_ld.T Vuu_ld = LD[to_impute].T[to_impute].T if run_fizi: if taus is not None: A = annot.get_matrix(merged_snps, taus.names) estimates = taus.estimates D = np.diag(gwas_n * np.dot(A, estimates)) Do = D.T[obs].T[obs] Du = D.T[to_impute].T[to_impute] uoV = Vuo_ld + mdot([Vuu_ld, Du, Vuo_ld]) + mdot( [Vuo_ld, Do, Voo_ld]) ooV = Voo_ld + mdot([Voo_ld, Do, Voo_ld]) + mdot( [Vou_ld, Du, Vuo_ld]) uuV = Vuu_ld + mdot([Vuu_ld, Du, Vuu_ld]) + mdot( [Vuo_ld, Do, Vou_ld]) else: A = annot.get_matrix(merged_snps) names = annot.names Ao = A[obs] flag = np.mean(Ao != 0, axis=0) > 0 Ao = Ao.T[flag].T A = A.T[flag].T names = names[flag] log.debug("Starting inference for variance parameters") estimates = pyfizi.infer_taus(obsZ, Voo_ld, Ao) if estimates is not None: log.debug("Finished variance parameter inference") estimates, sigma2e = estimates # rescale estimates estimates = estimates * np.sum(Ao != 0, axis=0) / np.sum( A != 0, axis=0) # N gets inferred as part of the parameter D = np.diag(np.dot(A, estimates)) Do = D.T[obs].T[obs] Du = D.T[to_impute].T[to_impute] uoV = Vuo_ld + mdot([Vuu_ld, Du, Vuo_ld]) + mdot( [Vuo_ld, Do, Voo_ld]) ooV = Voo_ld + mdot([Voo_ld, Do, Voo_ld]) + mdot( [Vou_ld, Du, Vuo_ld]) uuV = Vuu_ld + mdot([Vuu_ld, Du, Vuu_ld]) + mdot( [Vuo_ld, Do, Vou_ld]) else: log.warning( "Variance parameter optimization failed. Defaulting to ImpG" ) # estimation failed... default to ImpG uoV = Vuo_ld ooV = Voo_ld uuV = Vuu_ld else: uoV = Vuo_ld ooV = Voo_ld uuV = Vuu_ld log.debug( "Computing inverse of variance-covariance matrix for {} observed SNPs". format(nobs)) ooVinv = pinvh(ooV, check_finite=False) log.debug("Imputing {} SNPs from {} observed scores".format(nimp, nobs)) impZs = mdot([uoV, ooVinv, obsZ]) # compute r2-pred scores r2blup = np.diag(mdot([uoV, ooVinv, uoV.T])) / np.diag(uuV) # compute two-sided z-test for p-value pvals = chi2.sf(impZs**2, 1) return impZs, pvals, r2blup
def fast_mcd(X, support_fraction=None, cov_computation_method=empirical_covariance, random_state=None): """Estimates the Minimum Covariance Determinant matrix. Read more in the :ref:`User Guide <robust_covariance>`. Parameters ---------- X : array-like, shape (n_samples, n_features) The data matrix, with p features and n samples. support_fraction : float, 0 < support_fraction < 1 The proportion of points to be included in the support of the raw MCD estimate. Default is None, which implies that the minimum value of support_fraction will be used within the algorithm: `[n_sample + n_features + 1] / 2`. cov_computation_method : callable, default empirical_covariance The function which will be used to compute the covariance. Must return shape (n_features, n_features) random_state : int, RandomState instance or None, optional (default=None) If int, random_state is the seed used by the random number generator; If RandomState instance, random_state is the random number generator; If None, the random number generator is the RandomState instance used by `np.random`. Notes ----- The FastMCD algorithm has been introduced by Rousseuw and Van Driessen in "A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS". The principle is to compute robust estimates and random subsets before pooling them into a larger subsets, and finally into the full data set. Depending on the size of the initial sample, we have one, two or three such computation levels. Note that only raw estimates are returned. If one is interested in the correction and reweighting steps described in [RouseeuwVan]_, see the MinCovDet object. References ---------- .. [RouseeuwVan] A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS .. [Butler1993] R. W. Butler, P. L. Davies and M. Jhun, Asymptotics For The Minimum Covariance Determinant Estimator, The Annals of Statistics, 1993, Vol. 21, No. 3, 1385-1400 Returns ------- location : array-like, shape (n_features,) Robust location of the data. covariance : array-like, shape (n_features, n_features) Robust covariance of the features. support : array-like, type boolean, shape (n_samples,) A mask of the observations that have been used to compute the robust location and covariance estimates of the data set. """ random_state = check_random_state(random_state) X = check_array(X, ensure_min_samples=2, estimator='fast_mcd') n_samples, n_features = X.shape # minimum breakdown value if support_fraction is None: n_support = int(np.ceil(0.5 * (n_samples + n_features + 1))) else: n_support = int(support_fraction * n_samples) # 1-dimensional case quick computation # (Rousseeuw, P. J. and Leroy, A. M. (2005) References, in Robust # Regression and Outlier Detection, John Wiley & Sons, chapter 4) if n_features == 1: if n_support < n_samples: # find the sample shortest halves X_sorted = np.sort(np.ravel(X)) diff = X_sorted[n_support:] - X_sorted[:(n_samples - n_support)] halves_start = np.where(diff == np.min(diff))[0] # take the middle points' mean to get the robust location estimate location = 0.5 * (X_sorted[n_support + halves_start] + X_sorted[halves_start]).mean() support = np.zeros(n_samples, dtype=bool) X_centered = X - location support[np.argsort(np.abs(X_centered), 0)[:n_support]] = True covariance = np.asarray([[np.var(X[support])]]) location = np.array([location]) # get precision matrix in an optimized way precision = linalg.pinvh(covariance) dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1) else: support = np.ones(n_samples, dtype=bool) covariance = np.asarray([[np.var(X)]]) location = np.asarray([np.mean(X)]) X_centered = X - location # get precision matrix in an optimized way precision = linalg.pinvh(covariance) dist = (np.dot(X_centered, precision) * (X_centered)).sum(axis=1) # Starting FastMCD algorithm for p-dimensional case if (n_samples > 500) and (n_features > 1): # 1. Find candidate supports on subsets # a. split the set in subsets of size ~ 300 n_subsets = n_samples // 300 n_samples_subsets = n_samples // n_subsets samples_shuffle = random_state.permutation(n_samples) h_subset = int( np.ceil(n_samples_subsets * (n_support / float(n_samples)))) # b. perform a total of 500 trials n_trials_tot = 500 # c. select 10 best (location, covariance) for each subset n_best_sub = 10 n_trials = max(10, n_trials_tot // n_subsets) n_best_tot = n_subsets * n_best_sub all_best_locations = np.zeros((n_best_tot, n_features)) try: all_best_covariances = np.zeros( (n_best_tot, n_features, n_features)) except MemoryError: # The above is too big. Let's try with something much small # (and less optimal) all_best_covariances = np.zeros( (n_best_tot, n_features, n_features)) n_best_tot = 10 n_best_sub = 2 for i in range(n_subsets): low_bound = i * n_samples_subsets high_bound = low_bound + n_samples_subsets current_subset = X[samples_shuffle[low_bound:high_bound]] best_locations_sub, best_covariances_sub, _, _ = select_candidates( current_subset, h_subset, n_trials, select=n_best_sub, n_iter=2, cov_computation_method=cov_computation_method, random_state=random_state) subset_slice = np.arange(i * n_best_sub, (i + 1) * n_best_sub) all_best_locations[subset_slice] = best_locations_sub all_best_covariances[subset_slice] = best_covariances_sub # 2. Pool the candidate supports into a merged set # (possibly the full dataset) n_samples_merged = min(1500, n_samples) h_merged = int( np.ceil(n_samples_merged * (n_support / float(n_samples)))) if n_samples > 1500: n_best_merged = 10 else: n_best_merged = 1 # find the best couples (location, covariance) on the merged set selection = random_state.permutation(n_samples)[:n_samples_merged] locations_merged, covariances_merged, supports_merged, d = \ select_candidates( X[selection], h_merged, n_trials=(all_best_locations, all_best_covariances), select=n_best_merged, cov_computation_method=cov_computation_method, random_state=random_state) # 3. Finally get the overall best (locations, covariance) couple if n_samples < 1500: # directly get the best couple (location, covariance) location = locations_merged[0] covariance = covariances_merged[0] support = np.zeros(n_samples, dtype=bool) dist = np.zeros(n_samples) support[selection] = supports_merged[0] dist[selection] = d[0] else: # select the best couple on the full dataset locations_full, covariances_full, supports_full, d = \ select_candidates( X, n_support, n_trials=(locations_merged, covariances_merged), select=1, cov_computation_method=cov_computation_method, random_state=random_state) location = locations_full[0] covariance = covariances_full[0] support = supports_full[0] dist = d[0] elif n_features > 1: # 1. Find the 10 best couples (location, covariance) # considering two iterations n_trials = 30 n_best = 10 locations_best, covariances_best, _, _ = select_candidates( X, n_support, n_trials=n_trials, select=n_best, n_iter=2, cov_computation_method=cov_computation_method, random_state=random_state) # 2. Select the best couple on the full dataset amongst the 10 locations_full, covariances_full, supports_full, d = select_candidates( X, n_support, n_trials=(locations_best, covariances_best), select=1, cov_computation_method=cov_computation_method, random_state=random_state) location = locations_full[0] covariance = covariances_full[0] support = supports_full[0] dist = d[0] return location, covariance, support, dist
def write_exp_stds(grid, name): net = grid pp.rundcpp(net) ppc = net["_ppc"] ##### Setup Grid fluctuation parameters and constraints ######## ## thresold on shift significance in DC-PF Eqs ## pwr = pwr_shf + np.real(bbus)*va ## shf_eps = 1e-4 ## Std for fluctuating loads divided by their nominal values: ## for small grids values 0.5-1 are realistic ## larger grids have cov_std = 0.1 -- 0.3 or less ## cov_std = 0.25 if name == 'grid118i': cov_std = 0.1 ## Phase angle difference limit ## small grids: pi/8 -- pi/6 ## large grids: pi/3 -- pi/4 ## bnd = math.pi / 4 ### Cut small probabilities threshold ### discard all probabilities than thrs* prb(closest hyperplane) ### ### ### Crucially affects time performance ### thrs = 0.001 ### Number of samples used in experiments ### 500 is often enough ### 10000 is a default value supresses the variance nsmp = 1000 ### Step-sizes for KL and Var minimization ### works well with 0.1-0.01 eta_vm = 0.1 eta_kl = 0.1 ### Rounding threshold in optimization: ### if a (normalized on the simplex) hpl probability becomes lower then 0.001 ### we increase it to this level ### ### Crucially affects numerical stability ### eps = 0.001 ##### Setup power grid case in a convenient form for further sampling ######## ### find number of lines (m) and buses(n) m = net.line['to_bus'].size n = net.res_bus['p_mw'].size ### Construct adjacency matrix ### adj = np.zeros((2 * m, n)) for i in range(0, m): adj[i, net.line['to_bus'][i]] = 1 adj[i, net.line['from_bus'][i]] = -1 adj[i + m, net.line['to_bus'][i]] = -1 adj[i + m, net.line['from_bus'][i]] = 1 ### DC power flow equations have a form: ### ### pwr = pwr_shf + np.real(bbus)*va ### (compute all parameters) bbus = np.real(ppc['internal']['Bbus']) va = math.pi * net.res_bus['va_degree'] / 180 pwr = -net.res_bus['p_mw'] pwr_shf = pwr - bbus @ va ### pwr_shf is significant or not: ### ### if the shift is small: zero it out ### (simplifies testing and removes "math zeros") print("significant shift: ", np.max(pwr_shf) - np.min(pwr_shf) > shf_eps) if (np.max(pwr_shf) - np.min(pwr_shf) < shf_eps): pwr_shf[range(0, n)] = 0 ### Phase angle differences: ### ### va = pinv(bbus)*(pwr - pwr_shf) ### va_d = adj*va = adj*pinv(bbus)*(pwr - pwr_shf) ### va_d = pf_mat*pwr - va_shf bbus_pinv = pinvh(bbus.todense()) pf_mat = adj @ bbus_pinv va_shf = pf_mat @ pwr_shf ### Voltage angle differences: ### va_d = pf_mat @ pwr - va_shf ##### Distribution of fluctuations ###### ### assume the only one slack (a higher-level grid) in the grid ### supress all its fluctuations and balance the grid ### ### TODO: adjust to a general case ### slck = net.ext_grid['bus'] slck_mat = np.eye(n) slck_mat[slck] = -1 ## assign values to the whole array slck_mat[slck, slck] = 0 # and zero out for the slack itself ### set fluctuating components: either loads or gens or both ### loads = np.zeros(n) gens = np.zeros(n) ctrls = np.zeros(n) ## controllable loads + gens loads[net.load['bus']] = -net.res_load['p_mw'] gens[net.gen['bus']] = net.res_gen['p_mw'] ctrls = loads + gens ### assume only loads are fluctuating ### xi = loads ### Set covariance matrix and mean ### ### cov_sq = square of the covariance matrix ### Gaussian rv with covariance \Sigma is \Sigma^{1/2} * std_normal_rv ### ### TODO: change to LU/cholesky factorization ### cov_sq = cov_std * np.diag(np.abs(xi)) ### Final equations with fluctuations xi are then ### ### w/o fluctuations: ### va_d = pf_mat*pwr - va_shf ### with fluctuations: ### va_d = pf_mat@(pwr + slck_mat*cov_sq*xi) - va_shf ### va_d = pf_mat@pwr - va_shf + (pf_mat@(slck_mat@cov_sq))@xi_std ### va_d = mu + A@xi_std ### where xi_std is a standard normal with only fluctuating components ### A = (pf_mat @ slck_mat) @ cov_sq mu = pf_mat @ pwr - va_shf ### Feasibility Polytope Inequalities ### bnd \ge va_d = mu_f + A_f@xi_std ### incorporates both va_d \le b and va_d \ge -b as we have va_d's with 2 signs ### b = np.ones(2 * m) * bnd ### normalize the matrices to make it easier to compute a failure probability ### ### compute row norms of A nrms = np.maximum(la.norm(A, axis=1), 1e-20) ### normalize A and b so that b_n\ge A_n*xi_std b_n = (b - mu) / nrms A_n = [A[i] / nrms[i] for i in range(0, 2 * m)] ##### Assest equations feasibility ####### ### Power balance check ### print("Eqs balance check:", 0 == np.sum(np.sign(mu))) ### check positiveness of bnd - mu_f = RHS - LHS ### print("Inqs. feasibility check: ", np.min(b - mu) > 0) print("Min gap in phase angles = min(RHS - LHS)", np.min(b - mu)) ## positive value, otherwise the grid fails whp print("The RHS (phase angle diff max) = ", bnd) ### Compute probabilities: ### prb: probability of each hpl failure ### p_up, p_dwn: upper and lower bounds ### prb = norm.cdf(-b_n) p_up = np.sum(prb) p_dwn = np.max(prb) print("the union bound (upper):", p_up) print("the max bound (lower):", p_dwn) ### Keep only valuable probabilities: ### - use the union bound for all the rest ### - keep only the prbs higher than the thrs* p_dwn prbh_id = (prb > thrs * p_dwn) prb_rmd = np.sum(prb[~(prb > thrs * p_dwn)]) print("Remainder probability (omitted):", prb_rmd) ############ Preliminary steps for Sampling and Importance Sampling ############ ### normalize all active probabilities to one ### as we only play a hyperplane out of them ### ### NB: crucial steps in performance optimization ### x_id = np.where(prbh_id == True)[0] ### local normalized versions of A and b, ### reduced in size: number of rows now is equal to a number of constraints ### that have a high probability of violation ### x_bn = b_n[x_id] ### we do not care about the full matrix A and vector b ### only about important parts of them A_n = np.array(A_n) x_An = A_n[x_id] print("# hpls we care of: ", len(x_bn)) ############# Monte-Carlo ################## rv = norm() x_std = norm.rvs(size=[n, nsmp]) smp = x_An @ x_std ### fls_mc = failures in Monte-Carlo, e.g. ### when MC discovers a failure ### fls_mc = sum((x_bn <= smp.T[:]).T) print("Max # of hlps a sample if out of: ", np.max(fls_mc)) ### MC failure expectation and std ### mc_exp = (1 - np.sum(fls_mc == 0) / nsmp) * (1 - prb_rmd) + prb_rmd mc_std = (1 - prb_rmd) / math.sqrt(nsmp) # violation_dict = {} for i in range(0, np.max(fls_mc) + 1): print(i, "hpls violated (exactly) vs # cases", np.sum(fls_mc == i)) # violation_dict[i] = int(np.sum(fls_mc == i)) print("\nMC(exp, std):", (mc_exp, mc_std)) ### write into file # path_to_viol_dirs = os.path.join("results", "hplns_violations") # with open(os.path.join(path_to_viol_dirs, "grid3120", "result.json"), 'w+') as fp: # json.dump(violation_dict, fp) ############# ALOE ################## ### ### Exactly follows to the Owen/Maximov/Chertkov paper, EJOS'19 ### ### sample z ~ N(0, I_n) ### sample u ~ U(0,1) ### compute y = F^{-1}(u F(-b_i)) ### compute x = - (a_i * y + (I - a_i.T * a_i) z) ### ### Ouput: union bound divided by the expected failure multiplicity ### ### Initialize samplers ### ### sample z ~ N(0, I_n) and u ~ U(0,1) ### nsmp_ = 10000 rv = norm() rv_u = uniform() z = norm.rvs(size=[nsmp_, n]) u = uniform.rvs(size=[nsmp_]) ### x_alph is a vector of ALOE probabilities ### normalized by a unit simplex ### x_alph = prb[prbh_id] / np.sum(prb[prbh_id]) print("ALOE prbs for major hpls: ", x_alph) ### _hpl: how many smpls beyond each of the hpls ### _hpl = multinomial.rvs(n=nsmp_, p=x_alph) ### print("# samples per hpl", _hpl) ### Get cummulative sums, which are easier to work with _hpl = list(itertools.accumulate(_hpl)) _hpl = np.array(_hpl) ### print("cusum of # hpls", _hpl) ### Generate samples ### x_aloe -- samples generated by ALOE ### ### TODO: seems optimizable, but I am not sure about memory mgmnt in python x_aloe = np.zeros([nsmp_, n]) # index of the active hyperplane hpl_id = 0 ### get samples x_aloe according to the algorithm #for i in tqdm(range(0,nsmp)): for i in range(0, nsmp_): ### get index of a hyperplane to sample beyond hpl_id = (hpl_id, hpl_id + 1)[i >= _hpl[hpl_id]] y = norm.ppf(u[i] * norm.cdf(-x_bn[hpl_id])) x_aloe[i] = -x_An[hpl_id] * y - z[i] + np.outer( x_An[hpl_id], x_An[hpl_id]) @ z[i] ### test how many constraints are violated smp = x_An @ x_aloe.T ### compute expectation and std final and history aloe_exp = p_up * np.sum( 1. / np.sum(x_bn <= smp.T[:], axis=1)) / nsmp_ + prb_rmd aloe_std = p_up * math.sqrt(2 * len(_hpl)) / math.sqrt(nsmp_) # indeed len(_hpl) instead of 2*m in the Thrm aloe_exp_history = [ p_up * np.sum(1. / np.sum(x_bn <= (x_An @ x_aloe[:i, :].T).T, axis=1)) / (i + 1) + prb_rmd for i in range(0, nsmp_) ] #aloe_std_history = [p_up*math.sqrt(2*len(_hpl))/math.sqrt(i + 1) for i in range(0, nsmp_)] aloe_std_history = [np.std(aloe_exp_history[:i + 1]) for i in range(nsmp)] print("ALOE (exp, std)", (aloe_exp, aloe_std)) ####### Optimization approach ###### ####### ####### Variance Minimization ###### ####### ### setup the initial values eta = eta_vm md_var = 0 md_exp = 0 grad = np.zeros(len(x_bn)) #gradient on each iteration _hpl = np.zeros(nsmp) # hpls choosen by the method ### intentionally use a copy instead of a reference ### alph is a vector of weigths to be updated in algorithm ### alph = x_alph[:] ### history of probability estimate and std md_exp_history = [] md_std_history = [] # values for Phi (x_bn) x_phi = [norm.cdf(-x_bn[i]) for i in range(0, len(x_bn))] ### grad normalization by prbs[i] factor is introduced to make computations numerically stable ### prbs = prb[prbh_id] for i in tqdm(range(0, nsmp)): ### sample x according to current alph hpl_id = np.where( multinomial.rvs(n=1, p=alph, size=1, random_state=None)[0] == 1)[0] _hpl[i] = hpl_id ### generate a sample following to the ALOE procedure y = norm.ppf(u[i] * norm.cdf(-x_bn[hpl_id])) x_smp = -x_An[hpl_id] * y - z[i] + np.outer(x_An[hpl_id], x_An[hpl_id]) @ z[i] ### the RHS' to be compared with x_bn x_smp = x_An @ x_smp.T ### results of constraints violations for each generated object cns_vlt = (x_bn <= x_smp.T[:])[0] ### weight vector defined by the multiplicity of constraint violation for each sample wgt = 1. / np.sum(np.multiply(cns_vlt, np.multiply(alph, 1. / x_alph))) ### compute gradient of the variance, see the paper (our + OMC) for details grad = [ -p_up * p_up * wgt * wgt * norm.pdf(x_smp[k])[0] * cns_vlt[k] / prbs[k] for k in range(len(x_smp)) ] grad = np.array(grad) ### The gradient is high -- signal about emergency as it can zero out all weights if (la.norm(eta * grad) > 1e4): print( "\n############## Extremely high gradient ############\n" ) print("Iteration: ", i, "\nGradient:", grad) ### make a ``simplex MD'' update alph = [ math.exp(-eta * grad[k]) * alph[k] for k in range(0, len(x_smp)) ] ### enter if some coordinates are too small and may cause numerical instability ### increase the corresponding weigths if (np.min(alph) < eps): print("########### some coordinates are small #################") alph = [alph[k] + eps for k in range(0, len(x_bn))] ### make a projection to the unit simplex alph = alph / np.sum(alph) ### adjust contribution to the errors md_exp = md_exp + wgt md_exp_history.append(p_up * md_exp / (i + 1) + prb_rmd) md_var = md_var + p_up * np.dot(grad.T, grad) #md_std_history.append(p_up * math.sqrt(md_var) / (i + 1)) md_std_history = [np.std(md_exp_history[:i + 1]) for i in range(nsmp)] print("Optimal weigths of MD-Var minimization: ", alph) print("Optimal weigths of ALOE", x_alph) ### normalize errors, compute standard deviation md_exp = p_up * md_exp / nsmp + prb_rmd md_std = p_up * math.sqrt(md_var) / nsmp print("MD-Var (exp, std)", (md_exp, md_std)) #print("assert normalization:", np.sum(alph), np.sum(x_alph)) ####### Optimization approach ###### ####### ####### KL Minimization ###### ####### ### SMD step-size eta = eta_kl ### setup initial values kl_exp = 0 kl_var = 0 grad = np.zeros(len(x_bn)) _hpl = np.zeros(nsmp) ## _hpl[i] = beyond which hpl we sample on iteration i ### intentionally use a copy instead of a reference ### alph is an optimization variable alph = x_alph[:] ### history of probability estimate and std kl_exp_history = [] kl_std_history = [] ### this normalization factor is introduced to make computations numerically stable prbs = prb[prbh_id] for i in tqdm(range(0, nsmp)): #,miniters=500): ### sample x according to current alph hpl_id = np.where( multinomial.rvs(n=1, p=alph, size=1, random_state=None)[0] == 1)[0] _hpl[i] = hpl_id ### generate a sample accordint to ALOE y = norm.ppf(u[i] * norm.cdf(-x_bn[hpl_id])) x_smp = -x_An[hpl_id] * y - z[i] + np.outer(x_An[hpl_id], x_An[hpl_id]) @ z[i] ### RHS to compare with x_bn x_smp = x_An @ x_smp.T ### results of constraints violations for the generated object cns_vlt = (x_bn <= x_smp.T[:])[0] ### object weight which is set according to ALOE wgt = 1. / np.sum(np.multiply(cns_vlt, np.multiply(alph, 1. / x_alph))) # the KL divergence's gradient grad = [ -p_up * wgt * norm.pdf(x_smp[k])[0] * cns_vlt[k] / prbs[k] for k in range(len(x_smp)) ] grad = np.array(grad) ### The gradient is high -- signal about emergency as it can zero out all weights if (la.norm(eta * grad) > 1e4): print( "\n############## Extremely high gradient ############\n" ) print("Iteration: ", i, "\nGradient:", grad) ### make a ``simplex MD'' update alph = [ math.exp(-eta * grad[k]) * alph[k] for k in range(0, len(x_smp)) ] ### enter if some coordinates are too small and may cause numerical instability ### increase the corresponding weigths if (np.min(alph) < eps): print("########### some coordinates are small #################") alph = [alph[k] + eps for k in range(0, len(x_bn))] ### make a projection to the unit simplex alph = alph / np.sum(alph) ### adjust contribution to the errors kl_exp = kl_exp + wgt kl_exp_history.append(p_up * kl_exp / (i + 1) + prb_rmd) kl_var = kl_var + p_up * np.dot(grad.T, grad) * wgt #kl_std_history.append(p_up * math.sqrt(kl_var) / (i + 1)) kl_std_history = [np.std(kl_exp_history[:i + 1]) for i in range(nsmp)] print("Optimal weigths of MD-KL minimization: ", alph) print("Optimal weigths of ALOE", x_alph) ### normalize errors kl_exp = p_up * kl_exp / nsmp + prb_rmd kl_std = p_up * math.sqrt(kl_var) / nsmp print("MD-KL (exp, std)", (kl_exp, kl_std)) #print("assert normalization:", np.sum(alph), np.sum(x_alph)) ############## Output all probabilities ################## print("the union bound (up):", p_up) print("the max bound (lower):", p_dwn) print("MC(exp, std):", mc_exp, mc_std) print("ALOE(exp, std)", aloe_exp, aloe_std) print("MD-Var(exp, var)", md_exp, md_std) print("MD-KL(exp, var)", kl_exp, kl_std) output_dict = {} output_dict["MD-Var-exp"] = [float(a) for a in md_exp_history] output_dict["MD-KL-exp"] = [float(a) for a in kl_exp_history] output_dict["ALOE-Var-exp"] = [float(a) for a in aloe_exp_history][:nsmp] output_dict["MD-Var-std"] = [float(a) for a in md_std_history] output_dict["MD-KL-std"] = [float(a) for a in kl_std_history] output_dict["ALOE-Var-std"] = [float(a) for a in aloe_std_history][:nsmp] output_dict["ALOE-inf-exp"] = float(aloe_exp_history[-1]) with open(name + ".json", 'w+') as fp: json.dump(output_dict, fp)
def latent_graphical_lasso( emp_cov, alpha=1.0, tau=1.0, rho=1.0, max_iter=100, verbose=False, tol=1e-4, rtol=1e-2, return_history=False, return_n_iter=True, update_rho_options=None, compute_objective=True, init="empirical", ): r"""Latent variable graphical lasso solver via ADMM. Solves the following problem: min - log_likelihood(S, K-L) + alpha ||K||_{od,1} + tau ||L_i||_* where S = (1/n) X^T \times X is the empirical covariance of the data matrix X (training observations by features). Parameters ---------- emp_cov : array-like Empirical covariance matrix. alpha, tau : float, optional Regularisation parameters. rho : float, optional Augmented Lagrangian parameter. max_iter : int, optional Maximum number of iterations. tol : float, optional Absolute tolerance for convergence. rtol : float, optional Relative tolerance for convergence. return_history : bool, optional Return the history of computed values. return_n_iter : bool, optional Return the number of iteration before convergence. verbose : bool, default False Print info at each iteration. update_rho_options : dict, optional Arguments for the rho update. See regain.update_rules.update_rho function for more information. compute_objective : bool, default True Choose to compute the objective value. init : {'empirical', 'zeros', ndarray}, default 'empirical' How to initialise the inverse covariance matrix. Default is take the empirical covariance and inverting it. Returns ------- K, L : np.array, 2-dimensional, size (d x d) Solution to the problem. S : np.array, 2 dimensional Empirical covariance matrix. n_iter : int If return_n_iter, returns the number of iterations before convergence. history : list If return_history, then also a structure that contains the objective value, the primal and dual residual norms, and tolerances for the primal and dual residual norms at each iteration. """ K = init_precision(emp_cov, mode=init) L = np.zeros_like(emp_cov) U = np.zeros_like(emp_cov) R_old = np.zeros_like(emp_cov) checks = [] for iteration_ in range(max_iter): # update R A = K - L - U A += A.T A /= 2.0 R = prox_logdet(emp_cov - rho * A, lamda=1.0 / rho) A = L + R + U K = soft_thresholding(A, lamda=alpha / rho) A = K - R - U A += A.T A /= 2.0 L = prox_trace_indicator(A, lamda=tau / rho) # update residuals U += R - K + L # diagnostics, reporting, termination checks obj = objective(emp_cov, R, K, L, alpha, tau) if compute_objective else np.nan rnorm = np.linalg.norm(R - K + L) snorm = rho * np.linalg.norm(R - R_old) check = convergence( obj=obj, rnorm=rnorm, snorm=snorm, e_pri=np.sqrt(R.size) * tol + rtol * max(np.linalg.norm(R), np.linalg.norm(K - L)), e_dual=np.sqrt(R.size) * tol + rtol * rho * np.linalg.norm(U), ) R_old = R.copy() if verbose: print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break if check.obj == np.inf: break rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) # scaled dual variables should be also rescaled U *= rho / rho_new rho = rho_new else: warnings.warn("Objective did not converge.") covariance_ = linalg.pinvh(K) return_list = [K, L, covariance_] if return_history: return_list.append(checks) if return_n_iter: return_list.append(iteration_) return return_list
def _fit(self, pairs, y): if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") else: if self.verbose: print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up (the inverse of) the prior M # if the prior is the default (None), we raise a warning _, prior_inv = _initialize_metric_mahalanobis( pairs, self.prior, return_inverse=True, strict_pd=True, matrix_name='prior', random_state=self.random_state) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) min_eigval = np.min(w) if min_eigval < 0.: warnings.warn( "Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set prior='identity'.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems sigma0 = (V * w).dot(V.T) try: if HAS_SKGGM: theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, verbose=self.verbose, cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False not_finite = False # not_finite not applicable here so we set to False if raised_error is not None or not_spd or not_finite: msg = ("There was a problem in SDML when using {}'s graphical " "lasso solver." ).format("skggm" if HAS_SKGGM else "scikit-learn") if not HAS_SKGGM: skggm_advice = ( " skggm's graphical lasso can sometimes converge " "on non SPD cases where scikit-learn's graphical " "lasso fails to converge. Try to install skggm and " "rerun the algorithm (see the README.md for the " "right version of skggm).") msg += skggm_advice if raised_error is not None: msg += " The following error message was thrown: {}.".format( raised_error) raise RuntimeError(msg) self.components_ = components_from_metric(np.atleast_2d(M)) return self
def build(self, wfn, integrals): """Builds the Hamiltonian matrix from a list of trajectories.""" n_alive = wfn.nalive if integrals.hermitian: n_elem = int(n_alive * (n_alive + 1) / 2) else: n_elem = n_alive * n_alive if self.mat_dict['heff'].shape != (n_alive, n_alive): self.mat_dict['t'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['v'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['h'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['s_traj'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['s_nuc'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['s_elec'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['s'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['sinv'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['sdot'] = np.zeros((n_alive, n_alive), dtype=complex) self.mat_dict['heff'] = np.zeros((n_alive, n_alive), dtype=complex) # now evaluate the hamiltonian matrix for ij in range(n_elem): if integrals.hermitian: i, j = self.ut_ind(ij) else: i, j = self.sq_ind(ij, n_alive) ii = wfn.alive[i] jj = wfn.alive[j] s_nuc = integrals.nuc_overlap(wfn.traj[ii], wfn.traj[jj]) s_elec = integrals.elec_overlap(wfn.traj[ii], wfn.traj[jj]) # nuclear overlap matrix (excluding electronic component) self.mat_dict['s_nuc'][i, j] = s_nuc # nuclear overlap matrix (excluding electronic component) self.mat_dict['s_elec'][i, j] = s_elec # compute overlap of trajectories (different from S, which may or may # not involve integration in a gaussian basis self.mat_dict['s_traj'][i, j] = integrals.traj_overlap( wfn.traj[ii], wfn.traj[jj]) # overlap matrix (including electronic component) self.mat_dict['s'][i, j] = integrals.s_integral(wfn.traj[ii], wfn.traj[jj], nuc_ovrlp=s_nuc, elec_ovrlp=s_elec) # time-derivative of the overlap matrix (not hermitian in general) self.mat_dict['sdot'][i, j] = integrals.sdot_integral( wfn.traj[ii], wfn.traj[jj], nuc_ovrlp=s_nuc, elec_ovrlp=s_elec) # kinetic energy matrix self.mat_dict['t'][i, j] = integrals.t_integral(wfn.traj[ii], wfn.traj[jj], nuc_ovrlp=s_nuc, elec_ovrlp=s_elec) # potential energy matrix self.mat_dict['v'][i, j] = integrals.v_integral(wfn.traj[ii], wfn.traj[jj], nuc_ovrlp=s_nuc, elec_ovrlp=s_elec) # Hamiltonian matrix in non-orthogonal basis self.mat_dict['h'][ i, j] = self.mat_dict['t'][i, j] + self.mat_dict['v'][i, j] # if hermitian matrix, set (j,i) indices if integrals.hermitian and i != j: self.mat_dict['s_nuc'][j, i] = self.mat_dict['s_nuc'][ i, j].conjugate() self.mat_dict['s_elec'][j, i] = self.mat_dict['s_elec'][ i, j].conjugate() self.mat_dict['s_traj'][j, i] = self.mat_dict['s_traj'][ i, j].conjugate() self.mat_dict['s'][j, i] = self.mat_dict['s'][i, j].conjugate() self.mat_dict['sdot'][j, i] = integrals.sdot_integral( wfn.traj[jj], wfn.traj[ii], nuc_ovrlp=self.mat_dict['s_nuc'][j, i], elec_ovrlp=self.mat_dict['s_elec'][j, i]) self.mat_dict['t'][j, i] = self.mat_dict['t'][i, j].conjugate() self.mat_dict['v'][j, i] = self.mat_dict['v'][i, j].conjugate() self.mat_dict['h'][j, i] = self.mat_dict['h'][i, j].conjugate() if integrals.hermitian: # compute the S^-1, needed to compute Heff timings.start('linalg.pinvh') self.mat_dict['sinv'] = sp_linalg.pinvh(self.mat_dict['s']) #Sinv, cond = linalg.pseudo_inverse2(S) timings.stop('linalg.pinvh') else: # compute the S^-1, needed to compute Heff timings.start('hamiltonian.pseudo_inverse') self.mat_dict['sinv'], cond = linalg.pseudo_inverse( self.mat_dict['s']) timings.stop('hamiltonian.pseudo_inverse') self.mat_dict['heff'] = np.dot( self.mat_dict['sinv'], self.mat_dict['h'] - 1j * self.mat_dict['sdot'])
def mutual_incoherence(X_relevant, X_irelevant): """Mutual incoherence, as defined by formula (26a) of [Wainwright2006]. """ projector = np.dot(np.dot(X_irelevant.T, X_relevant), linalg.pinvh(np.dot(X_relevant.T, X_relevant))) return np.max(np.abs(projector).sum(axis=1))
def __init__(self, nu, S): self.nu = nu self.S = S self.D = S.shape[0] self.inv_S = linalg.pinvh(S)
# write the estimated values to a SBtab ifle (out1.tsv) reaction_df = pd.DataFrame(zip(pp.reaction_ids, pp.reaction_formulas, standard_dg_prime_in_kJ_per_mol, dg_sigma_in_kJ_per_mol), columns=[ "reaction_id", "reaction_formula", "standard_dg_prime_in_kJ_per_mol", "dg_sigma_in_kJ_per_mol" ]) sbtabdoc = sbtab.SBtab.SBtabDocument() sbtabdoc.add_sbtab( sbtab.SBtab.SBtabTable.from_data_frame(reaction_df.applymap(str), table_id="Thermodynamics", table_type="Quantity")) sbtabdoc.write("out1.tsv") # Save the Precision matrix to dg_precision.mat in the Matlab binary file format try: _, dg_precision = cc.standard_dg_prime_multi( pp.reactions, uncertainty_representation="precision") dg_precision = dg_precision.m_as("mol**2/kJ**2") except ValueError: sys.stderr.write( "uncertainty_representation = 'precision' is not implemented " "in this version of equilibrator-api.\ninverting the covariance matrix " "using the pseudoinverse function of scipy.") dg_precision = pinvh(dg_cov.m_as("kJ**2/mol**2")) mdic = {"dg_precision": dg_precision, "rxn_id": list(pp.reaction_ids)} scipy.io.savemat("dg_precision.mat", mdic)
def fit(x, y_onehot, niter_max, l2): #print(niter_max) l, n = x.shape m = y_onehot.shape[1] # number of categories x_av = np.mean(x, axis=0) dx = x - x_av c = np.cov(dx, rowvar=False, bias=True) # 2019.07.16: l2 = lamda/(2L) c += l2 * np.identity(n) / (2 * l) c_inv = linalg.pinvh(c) H0 = np.zeros(m) W = np.zeros((n, m)) for i in range(m): y = y_onehot[:, i] # y = {0,1} y1 = 2 * y - 1 # y1 = {-1,1} # initial values h0 = 0. w = np.random.normal(0.0, 1. / np.sqrt(n), size=(n)) cost = np.full(niter_max, 100.) for iloop in range(niter_max): h = h0 + x.dot(w) y1_model = np.tanh(h / 2.) # stopping criterion #p = 1/(1+np.exp(-h)) #cost[iloop] = ((p-y)**2).mean() # 2019.07.12: lost function cost[iloop] = ((y1[:] - y1_model[:])**2).mean() #cost[iloop] = (-y[:]*np.log(p) - (1-y)*np.log(1-p)).mean() #h_test = h0 + x_test.dot(w) #p_test = 1/(1+np.exp(-h_test)) #cost[iloop] = ((p_test-y_test)**2).mean() if iloop > 0 and cost[iloop] >= cost[iloop - 1]: break # update local field t = h != 0 h[t] *= y1[t] / y1_model[t] h[~t] = 2 * y1[~t] # find w from h h_av = h.mean() dh = h - h_av dhdx = dh[:, np.newaxis] * dx[:, :] dhdx_av = dhdx.mean(axis=0) w = c_inv.dot(dhdx_av) h0 = h_av - x_av.dot(w) H0[i] = h0 W[:, i] = w return H0, W
def compute_density_gCorr(self, use_variance=True, comp_err=True): # TODO: matrix A should be in sparse format! # compute changes in free energy if self.Fij_array is None: self.compute_deltaFs_grads_semisum() if self.verb: print("gCorr density estimation started") sec = time.time() # compute adjacency matrix and cumulative changes A = sparse.lil_matrix((self.N, self.N), dtype=np.float_) supp_deltaF = sparse.lil_matrix((self.N, self.N), dtype=np.float_) # define redundancy factor for each A matrix entry as the geometric mean of the 2 corresponding k* k1 = self.kstar[self.nind_list[:, 0]] k2 = self.kstar[self.nind_list[:, 1]] redundancy = np.sqrt(k1 * k2) if use_variance: for nspar, indices in enumerate(self.nind_list): i = indices[0] j = indices[1] # tmp = 1.0 / self.Fij_var_array[nspar] tmp = 1.0 / self.Fij_var_array[nspar] / redundancy[nspar] A[i, j] = -tmp supp_deltaF[i, j] = self.Fij_array[nspar] * tmp else: for nspar, indices in enumerate(self.nind_list): i = indices[0] j = indices[1] # A[i, j] = -1.0 A[i, j] = -1.0 / redundancy[nspar] supp_deltaF[i, j] = self.Fij_array[nspar] A = sparse.lil_matrix(A + A.transpose()) diag = np.array(-A.sum(axis=1)).reshape((self.N,)) A.setdiag(diag) # print("Diag = {}".format(diag)) deltaFcum = np.array(supp_deltaF.sum(axis=0)).reshape((self.N,)) - np.array( supp_deltaF.sum(axis=1) ).reshape((self.N,)) sec2 = time.time() if self.verb: print("{0:0.2f} seconds to fill sparse matrix".format(sec2 - sec)) log_den = sparse.linalg.spsolve(A.tocsr(), deltaFcum) if self.verb: print("{0:0.2f} seconds to solve linear system".format(time.time() - sec2)) sec2 = time.time() self.log_den = log_den # self.log_den_err = np.sqrt((sparse.linalg.inv(A.tocsc())).diagonal()) if comp_err is True: self.A = A.todense() self.B = slin.pinvh(self.A) # self.B = slin.inv(self.A) self.log_den_err = np.sqrt(np.diag(self.B)) if self.verb: print("{0:0.2f} seconds inverting A matrix".format(time.time() - sec2)) sec2 = time.time() # self.log_den_err = np.sqrt(np.diag(slin.pinvh(A.todense()))) # self.log_den_err = np.sqrt(diag/np.array(np.sum(np.square(A.todense()),axis=1)).reshape(self.N,)) sec2 = time.time() if self.verb: print("{0:0.2f} seconds for gCorr density estimation".format(sec2 - sec))
def _c_step(X, n_support, random_state, remaining_iterations=30, initial_estimates=None, verbose=False, cov_computation_method=empirical_covariance): n_samples, n_features = X.shape dist = np.inf # Initialisation support = np.zeros(n_samples, dtype=bool) if initial_estimates is None: # compute initial robust estimates from a random subset support[random_state.permutation(n_samples)[:n_support]] = True else: # get initial robust estimates from the function parameters location = initial_estimates[0] covariance = initial_estimates[1] # run a special iteration for that case (to get an initial support) precision = linalg.pinvh(covariance) X_centered = X - location dist = (np.dot(X_centered, precision) * X_centered).sum(1) # compute new estimates support[np.argsort(dist)[:n_support]] = True X_support = X[support] location = X_support.mean(0) covariance = cov_computation_method(X_support) # Iterative procedure for Minimum Covariance Determinant computation det = fast_logdet(covariance) # If the data already has singular covariance, calculate the precision, # as the loop below will not be entered. if np.isinf(det): precision = linalg.pinvh(covariance) previous_det = np.inf while (det < previous_det and remaining_iterations > 0 and not np.isinf(det)): # save old estimates values previous_location = location previous_covariance = covariance previous_det = det previous_support = support # compute a new support from the full data set mahalanobis distances precision = linalg.pinvh(covariance) X_centered = X - location dist = (np.dot(X_centered, precision) * X_centered).sum(axis=1) # compute new estimates support = np.zeros(n_samples, dtype=bool) support[np.argsort(dist)[:n_support]] = True X_support = X[support] location = X_support.mean(axis=0) covariance = cov_computation_method(X_support) det = fast_logdet(covariance) # update remaining iterations for early stopping remaining_iterations -= 1 previous_dist = dist dist = (np.dot(X - location, precision) * (X - location)).sum(axis=1) # Check if best fit already found (det => 0, logdet => -inf) if np.isinf(det): results = location, covariance, det, support, dist # Check convergence if np.allclose(det, previous_det): # c_step procedure converged if verbose: print("Optimal couple (location, covariance) found before" " ending iterations (%d left)" % (remaining_iterations)) results = location, covariance, det, support, dist elif det > previous_det: # determinant has increased (should not happen) warnings.warn( "Warning! det > previous_det (%.15f > %.15f)" % (det, previous_det), RuntimeWarning) results = previous_location, previous_covariance, \ previous_det, previous_support, previous_dist # Check early stopping if remaining_iterations == 0: if verbose: print('Maximum number of iterations reached') results = location, covariance, det, support, dist return results
def make_data( n=150, min_v=None, max_v=None, ns=None, p=3, q=3, tau_range=(0.25, 1.5), max_neighbors=8, random_state=None, ): """Generate random data to use for modelling species occupancy. Parameters ---------- n : int, optional Number of sites. Defaults to 150. min_v : int, optional Minimum number of visits per site. If None, the maximum number is set to 2. Defaults to None. max_v : int, optional Maximum number of visits per site. If None, the maximum number is set to 10% of `n`. Defaults to None. ns : int, optional Number of surveyed sites out of `n`. If None, then this parameter is set to 50% of `n`. Defaults to None. p : int, optional Number covariates to use for species occupancy. Defaults to 3. q : int, optional Number of covariates to use for conditonal detection. Defaults to 3. tau_range : tuple, optional The range to randomly sample the precision parameter value from. Defaults to (0.25, 1.5). max_neighbors : int, optional Maximum number of neighbors per site. Should be one of {4, 8}. Default is 8. random_state : int, optional The seed to use for random number generation. Useful for reproducing generated data. If None then a random seed is chosen. Defaults to None. Returns ------- Q : scipy.sparse.coo_matrix Spatial precision matrix W : Dict[int, np.ndarray] Dictionary of detection corariates where the keys are the site numbers of the surveyed sites and the values are arrays containing the design matrix of each corresponding site. X : np.ndarray Design matrix of species occupancy covariates. y : Dict[int, np.ndarray] Dictionary of survey data where the keys are the site numbers of the surveyed sites and the values are number arrays of 1's and 0's where 0's indicate "no detection" and 1's indicate "detection". The length of each array equals the number of visits in the corresponding site. alpha : np.ndarray True values of coefficients of detection covariates. beta : np.ndarray True values of coefficients of occupancy covariates. tau : np.ndarray True value of the precision parameter z : np.ndarray True occupancy state for all `n` sites. Raises ------ ValueError When `n` is less than the default 150 sites. When `min_v` is less than 1. When `max_v` is less than 2 or greater than `n`. When `ns` is not a positive integer or greater than `n`. Examples -------- >>> from occuspytial.utils import make_data >>> Q, W, X, y, alpha, beta, tau, z = make_data() >>> Q <150x150 sparse matrix of type '<class 'numpy.float64'>' with 1144 stored elements in COOrdinate format> >>> Q.toarray() array([[ 3., -1., 0., ..., 0., 0., 0.], # random [-1., 5., -1., ..., 0., 0., 0.], [ 0., -1., 5., ..., 0., 0., 0.], ..., [ 0., 0., 0., ..., 5., -1., 0.], [ 0., 0., 0., ..., -1., 5., -1.], [ 0., 0., 0., ..., 0., -1., 3.]]) >>> W {81: array([[ 1. , 1.01334565, 0.93150242], # random [ 1. , 0.19276808, -1.71939657], [ 1. , 0.23866531, 0.0559545 ], [ 1. , 1.36102304, 1.73611887], [ 1. , 0.47247886, 0.73410589], [ 1. , -1.9018879 , 0.0097963 ]]), 131: array([[ 1. , 1.67846707, -1.12476746], [ 1. , -1.63131532, -1.32216705], [ 1. , -1.37431173, -0.79734213], ..., 21: array([[ 1. , 1.6416734 , -1.91642502], [ 1. , 0.2256312 , -1.68929118], [ 1. , 1.36953093, 1.08758129], [ 1. , -1.08029212, 0.40219588]])} >>> X array([[ 1. , 0.71582433, 1.76344395], [ 1. , 0.8561976 , 1.0520401 ], [ 1. , -0.28051247, 0.16809809], ..., [ 1. , 0.86702262, -1.18225448], [ 1. , -0.41346399, -0.9633078 ], [ 1. , -0.23182363, 1.69930761]]) >>> y {15: array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]), # random 81: array([0, 0, 0, 1, 1, 0]), ..., 21: array([0, 1, 0, 0])} >>> alpha array([-1.43291816, -0.87932413, -1.84927642]) # random >>> beta array([-0.62084322, -1.09645564, -0.93371374]) # random >>> tau 1.415532667780688 # random >>> z array([0, 1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 1, 0, 0, 1, 0, 1, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 0, 1, 0, 0, 1, 0]) """ rng = get_generator(random_state) if n < 150: raise ValueError('n cant be lower than 150') if min_v is None: min_v = 2 elif min_v < 1: raise ValueError('min_v needs to be at least 1') if max_v is None: max_v = n // 10 elif max_v < 2: raise ValueError('max_v is too small') elif max_v > n: raise ValueError('max_v cant be more than n') if ns is None: ns = n // 2 elif ns == 0: raise ValueError('ns should be positive') elif ns > n: raise ValueError('ns cant be more than n') surveyed_sites = rng.choice(range(n), size=ns, replace=False) visits_per_site = rng.integers(min_v, max_v, size=ns, endpoint=True) alpha = rng.standard_normal(q) beta = rng.standard_normal(p) tau = rng.uniform(*tau_range) factors = [] for i in range(3, n): if (n % i) == 0: factors.append(i) row = rng.choice(factors) col = n // row Q = rand_precision_mat(row, col, max_neighbors=max_neighbors).astype(float) Q_pinv = pinvh(Q.toarray(), cond=1e-5) eta = rng.multivariate_normal(np.zeros(n), Q_pinv / tau, method='eigh') X = rng.uniform(-2, 2, n * p).reshape(n, -1) X[:, 0] = 1 psi = np.exp(-np.logaddexp(0, -X @ beta + eta)) z = rng.binomial(1, p=psi, size=n) W, y = {}, {} for i, j in zip(surveyed_sites, visits_per_site): _W = rng.uniform(-2, 2, size=j * q).reshape(j, -1) _W[:, 0] = 1 d = np.exp(-np.logaddexp(0, -_W @ alpha)) W[i] = _W y[i] = rng.binomial(1, z[i] * d) return Q, W, X, y, alpha, beta, tau, z
def compute_density_dF_PAk_gCorr(self, use_variance=True, alpha=1.0, comp_err=True): # check for deltaFij if self.Fij_array is None: self.compute_deltaFs_grads_semisum() if self.verb: print("dF_PAk_gCorr density estimation started") sec = time.time() dc = np.zeros(self.N, dtype=float) corrected_vols = np.zeros(self.N, dtype=float) log_den = np.zeros(self.N, dtype=float) log_den_err = np.zeros(self.N, dtype=float) prefactor = np.exp( self.intrinsic_dim / 2.0 * np.log(np.pi) - gammaln((self.intrinsic_dim + 2) / 2) ) log_den_min = 9.9e300 for i in range(self.N): k = int(self.kstar[i]) dc[i] = self.distances[i, k] Fijs = self.Fij_array[self.nind_iptr[i] : self.nind_iptr[i + 1]] for j in range(1, k): Fij = Fijs[j - 1] rjjm1 = ( self.distances[i, j] ** self.intrinsic_dim - self.distances[i, j - 1] ** self.intrinsic_dim ) corrected_vols[i] += rjjm1 * np.exp(Fij) # * (1+Fij) corrected_vols *= prefactor * self.N self.dc = dc # compute adjacency matrix and cumulative changes A = sparse.lil_matrix((self.N, self.N), dtype=np.float_) supp_deltaF = sparse.lil_matrix((self.N, self.N), dtype=np.float_) # define redundancy factor for each A matrix entry as the geometric mean of the 2 corresponding k* k1 = self.kstar[self.nind_list[:, 0]] k2 = self.kstar[self.nind_list[:, 1]] redundancy = np.sqrt(k1 * k2) if use_variance: for nspar, indices in enumerate(self.nind_list): i = indices[0] j = indices[1] tmp = 1.0 / self.Fij_var_array[nspar] / redundancy[nspar] A[i, j] = -tmp supp_deltaF[i, j] = self.Fij_array[nspar] * tmp else: for nspar, indices in enumerate(self.nind_list): i = indices[0] j = indices[1] A[i, j] = -1.0 / redundancy[nspar] supp_deltaF[i, j] = self.Fij_array[nspar] / redundancy[nspar] A = alpha * sparse.lil_matrix(A + A.transpose()) diag = np.array(-A.sum(axis=1)).reshape((self.N,)) + (1.0 - alpha) * self.kstar # print("Diag = {}".format(diag)) A.setdiag(diag) deltaFcum = alpha * ( np.array(supp_deltaF.sum(axis=0)).reshape((self.N,)) - np.array(supp_deltaF.sum(axis=1)).reshape((self.N,)) ) + (1.0 - alpha) * (self.kstar * (np.log(self.kstar / corrected_vols))) log_den = sparse.linalg.spsolve(A.tocsr(), deltaFcum) self.log_den = log_den if comp_err is True: self.A = A.todense() self.B = slin.pinvh(self.A) self.log_den_err = np.sqrt(np.diag(self.B)) sec2 = time.time() if self.verb: print( "{0:0.2f} seconds for dF_PAk_gCorr density estimation".format( sec2 - sec ) )
def latent_time_graphical_lasso(emp_cov, alpha=0.01, tau=1., rho=1., beta=1., eta=1., max_iter=100, n_samples=None, verbose=False, psi='laplacian', phi='laplacian', mode='admm', tol=1e-4, rtol=1e-4, return_history=False, return_n_iter=True, update_rho_options=None, compute_objective=True, init='empirical'): r"""Latent variable time-varying graphical lasso solver. Solves the following problem via ADMM: min sum_{i=1}^T -n_i log_likelihood(S_i, K_i-L_i) + alpha ||K_i||_{od,1} + tau ||L_i||_* + beta sum_{i=2}^T Psi(K_i - K_{i-1}) + eta sum_{i=2}^T Phi(L_i - L_{i-1}) where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data matrix X (training observations by features). Parameters ---------- emp_cov : ndarray, shape (n_features, n_features) Empirical covariance of data. alpha, tau, beta, eta : float, optional Regularisation parameters. rho : float, optional Augmented Lagrangian parameter. max_iter : int, optional Maximum number of iterations. n_samples : ndarray Number of samples available for each time point. tol : float, optional Absolute tolerance for convergence. rtol : float, optional Relative tolerance for convergence. return_history : bool, optional Return the history of computed values. return_n_iter : bool, optional Return the number of iteration before convergence. verbose : bool, default False Print info at each iteration. update_rho_options : dict, optional Arguments for the rho update. See regain.update_rules.update_rho function for more information. compute_objective : bool, default True Choose to compute the objective value. init : {'empirical', 'zeros', ndarray}, default 'empirical' How to initialise the inverse covariance matrix. Default is take the empirical covariance and inverting it. Returns ------- K, L : numpy.array, 3-dimensional (T x d x d) Solution to the problem for each time t=1...T . history : list If return_history, then also a structure that contains the objective value, the primal and dual residual norms, and tolerances for the primal and dual residual norms at each iteration. """ psi, prox_psi, psi_node_penalty = check_norm_prox(psi) phi, prox_phi, phi_node_penalty = check_norm_prox(phi) Z_0 = init_precision(emp_cov, mode=init) Z_1 = Z_0.copy()[:-1] Z_2 = Z_0.copy()[1:] W_0 = np.zeros_like(Z_0) W_1 = np.zeros_like(Z_1) W_2 = np.zeros_like(Z_2) X_0 = np.zeros_like(Z_0) X_1 = np.zeros_like(Z_1) X_2 = np.zeros_like(Z_2) U_1 = np.zeros_like(W_1) U_2 = np.zeros_like(W_2) R_old = np.zeros_like(Z_0) Z_1_old = np.zeros_like(Z_1) Z_2_old = np.zeros_like(Z_2) W_1_old = np.zeros_like(W_1) W_2_old = np.zeros_like(W_2) # divisor for consensus variables, accounting for two less matrices divisor = np.full(emp_cov.shape[0], 3, dtype=float) divisor[0] -= 1 divisor[-1] -= 1 if n_samples is None: n_samples = np.ones(emp_cov.shape[0]) checks = [] for iteration_ in range(max_iter): # update R A = Z_0 - W_0 - X_0 A += A.transpose(0, 2, 1) A /= 2. A *= -rho / n_samples[:, None, None] A += emp_cov # A = emp_cov / rho - A R = np.array( [prox_logdet(a, lamda=ni / rho) for a, ni in zip(A, n_samples)]) # update Z_0 A = R + W_0 + X_0 A[:-1] += Z_1 - X_1 A[1:] += Z_2 - X_2 A /= divisor[:, None, None] # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho) # Z_0 = np.array(map(soft_thresholding_, A)) Z_0 = soft_thresholding(A, lamda=alpha / (rho * divisor[:, None, None])) # update Z_1, Z_2 A_1 = Z_0[:-1] + X_1 A_2 = Z_0[1:] + X_2 if not psi_node_penalty: prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho) Z_1 = .5 * (A_1 + A_2 - prox_e) Z_2 = .5 * (A_1 + A_2 + prox_e) else: Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1), lamda=.5 * beta / rho, rho=rho, tol=tol, rtol=rtol, max_iter=max_iter) # update W_0 A = Z_0 - R - X_0 A[:-1] += W_1 - U_1 A[1:] += W_2 - U_2 A /= divisor[:, None, None] A += A.transpose(0, 2, 1) A /= 2. W_0 = np.array([ prox_trace_indicator(a, lamda=tau / (rho * div)) for a, div in zip(A, divisor) ]) # update W_1, W_2 A_1 = W_0[:-1] + U_1 A_2 = W_0[1:] + U_2 if not phi_node_penalty: prox_e = prox_phi(A_2 - A_1, lamda=2. * eta / rho) W_1 = .5 * (A_1 + A_2 - prox_e) W_2 = .5 * (A_1 + A_2 + prox_e) else: W_1, W_2 = prox_phi(np.concatenate((A_1, A_2), axis=1), lamda=.5 * eta / rho, rho=rho, tol=tol, rtol=rtol, max_iter=max_iter) # update residuals X_0 += R - Z_0 + W_0 X_1 += Z_0[:-1] - Z_1 X_2 += Z_0[1:] - Z_2 U_1 += W_0[:-1] - W_1 U_2 += W_0[1:] - W_2 # diagnostics, reporting, termination checks rnorm = np.sqrt( squared_norm(R - Z_0 + W_0) + squared_norm(Z_0[:-1] - Z_1) + squared_norm(Z_0[1:] - Z_2) + squared_norm(W_0[:-1] - W_1) + squared_norm(W_0[1:] - W_2)) snorm = rho * np.sqrt( squared_norm(R - R_old) + squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old) + squared_norm(W_1 - W_1_old) + squared_norm(W_2 - W_2_old)) obj = objective(emp_cov, n_samples, R, Z_0, Z_1, Z_2, W_0, W_1, W_2, alpha, tau, beta, eta, psi, phi) \ if compute_objective else np.nan check = convergence( obj=obj, rnorm=rnorm, snorm=snorm, e_pri=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * max( np.sqrt( squared_norm(R) + squared_norm(Z_1) + squared_norm(Z_2) + squared_norm(W_1) + squared_norm(W_2)), np.sqrt( squared_norm(Z_0 - W_0) + squared_norm(Z_0[:-1]) + squared_norm(Z_0[1:]) + squared_norm(W_0[:-1]) + squared_norm(W_0[1:]))), e_dual=np.sqrt(R.size + 4 * Z_1.size) * tol + rtol * rho * (np.sqrt( squared_norm(X_0) + squared_norm(X_1) + squared_norm(X_2) + squared_norm(U_1) + squared_norm(U_2)))) R_old = R.copy() Z_1_old = Z_1.copy() Z_2_old = Z_2.copy() W_1_old = W_1.copy() W_2_old = W_2.copy() if verbose: print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) checks.append(check) if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) # scaled dual variables should be also rescaled X_0 *= rho / rho_new X_1 *= rho / rho_new X_2 *= rho / rho_new U_1 *= rho / rho_new U_2 *= rho / rho_new rho = rho_new else: warnings.warn("Objective did not converge.") covariance_ = np.array([linalg.pinvh(x) for x in Z_0]) return_list = [Z_0, W_0, covariance_] if return_history: return_list.append(checks) if return_n_iter: return_list.append(iteration_) return return_list
def time_graphical_lasso(emp_cov, alpha=0.01, rho=1, beta=1, theta=0.5, max_iter=100, n_samples=None, verbose=False, psi='laplacian', tol=1e-4, rtol=1e-4, return_history=False, return_n_iter=True, mode='admm', compute_objective=True, stop_at=None, stop_when=1e-4, update_rho_options=None, init='empirical'): """Time-varying graphical lasso solver. Solves the following problem via ADMM: min sum_{i=1}^T -n_i log_likelihood(S_i, K_i) + alpha*||K_i||_{od,1} + beta sum_{i=2}^T Psi(K_i - K_{i-1}) where S_i = (1/n_i) X_i^T \times X_i is the empirical covariance of data matrix X (training observations by features). Parameters ---------- emp_cov : ndarray, shape (n_features, n_features) Empirical covariance of data. alpha, beta : float, optional Regularisation parameter. rho : float, optional Augmented Lagrangian parameter. max_iter : int, optional Maximum number of iterations. n_samples : ndarray Number of samples available for each time point. tol : float, optional Absolute tolerance for convergence. rtol : float, optional Relative tolerance for convergence. return_history : bool, optional Return the history of computed values. return_n_iter : bool, optional Return the number of iteration before convergence. verbose : bool, default False Print info at each iteration. update_rho_options : dict, optional Arguments for the rho update. See regain.update_rules.update_rho function for more information. compute_objective : bool, default True Choose to compute the objective value. init : {'empirical', 'zero', ndarray} Choose how to initialize the precision matrix, with the inverse empirical covariance, zero matrix or precomputed. Returns ------- K : numpy.array, 3-dimensional (T x d x d) Solution to the problem for each time t=1...T . history : list If return_history, then also a structure that contains the objective value, the primal and dual residual norms, and tolerances for the primal and dual residual norms at each iteration. """ psi, prox_psi, psi_node_penalty = check_norm_prox(psi) Z_0 = init_precision(emp_cov, mode=init) Z_1 = Z_0.copy()[:-1] # np.zeros_like(emp_cov)[:-1] Z_2 = Z_0.copy()[1:] # np.zeros_like(emp_cov)[1:] U_0 = np.zeros_like(Z_0) U_1 = np.zeros_like(Z_1) U_2 = np.zeros_like(Z_2) Z_0_old = np.zeros_like(Z_0) Z_1_old = np.zeros_like(Z_1) Z_2_old = np.zeros_like(Z_2) # divisor for consensus variables, accounting for two less matrices divisor = np.full(emp_cov.shape[0], 3, dtype=float) divisor[0] -= 1 divisor[-1] -= 1 if n_samples is None: n_samples = np.ones(emp_cov.shape[0]) checks = [ convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_1, Z_2, alpha, beta, psi)) ] for iteration_ in range(max_iter): # update K A = Z_0 - U_0 A[:-1] += Z_1 - U_1 A[1:] += Z_2 - U_2 A += A.transpose(0, 2, 1) A /= 2. A *= -rho / n_samples[:, None, None] A += emp_cov K = np.array([ prox_logdet_alt(a, lamda=rho * div) for a, div in zip(A, divisor) ]) # update Z_0 A = K + U_0 A += A.transpose(0, 2, 1) A /= 2. Z_0 = soft_thresholding_od(A, lamda=alpha / rho) # other Zs A_1 = K[:-1] + U_1 A_2 = K[1:] + U_2 if not psi_node_penalty: prox_e = prox_psi(A_2 - A_1, lamda=2. * beta / rho) Z_1 = .5 * (A_1 + A_2 - prox_e) Z_2 = .5 * (A_1 + A_2 + prox_e) else: Z_1, Z_2 = prox_psi(np.concatenate((A_1, A_2), axis=1), lamda=.5 * beta / rho, rho=rho, tol=tol, rtol=rtol, max_iter=max_iter) # update residuals U_0 += K - Z_0 U_1 += K[:-1] - Z_1 U_2 += K[1:] - Z_2 # diagnostics, reporting, termination checks rnorm = np.sqrt( squared_norm(K - Z_0) + squared_norm(K[:-1] - Z_1) + squared_norm(K[1:] - Z_2)) snorm = rho * np.sqrt( squared_norm(Z_0 - Z_0_old) + squared_norm(Z_1 - Z_1_old) + squared_norm(Z_2 - Z_2_old)) obj = objective( n_samples, emp_cov, Z_0, K, Z_1, Z_2, alpha, beta, psi) \ if compute_objective else np.nan check = convergence( obj=obj, rnorm=rnorm, snorm=snorm, e_pri=np.sqrt(K.size + 2 * Z_1.size) * tol + rtol * max( np.sqrt( squared_norm(Z_0) + squared_norm(Z_1) + squared_norm(Z_2)), np.sqrt( squared_norm(K) + squared_norm(K[:-1]) + squared_norm(K[1:]))), e_dual=np.sqrt(K.size + 2 * Z_1.size) * tol + rtol * rho * np.sqrt(squared_norm(U_0) + squared_norm(U_1) + squared_norm(U_2)), # precision=Z_0.copy() ) Z_0_old = Z_0.copy() Z_1_old = Z_1.copy() Z_2_old = Z_2.copy() if verbose: print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) checks.append(check) if stop_at is not None: if abs(check.obj - stop_at) / abs(stop_at) < stop_when: break if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) # scaled dual variables should be also rescaled U_0 *= rho / rho_new U_1 *= rho / rho_new U_2 *= rho / rho_new rho = rho_new #assert is_pos_def(Z_0) else: warnings.warn("Objective did not converge.") print(iteration_, penalty_objective(Z_0, Z_0[:-1], Z_0[1:], psi, theta)) covariance_ = np.array([linalg.pinvh(x) for x in Z_0]) return_list = [Z_0, covariance_] if return_history: return_list.append(checks) if return_n_iter: return_list.append(iteration_ + 1) return return_list
def pinvh(a, cond=None, rcond=None, lower=True): return linalg.pinvh(a, cond, rcond, lower)
def _initialize_metric_mahalanobis(input, init='identity', random_state=None, return_inverse=False, strict_pd=False, matrix_name='matrix'): """Returns a PSD matrix that can be used as a prior or an initialization for the Mahalanobis distance Parameters ---------- input : array-like The input samples (can be tuples or regular samples). init : string or numpy array, optional (default='identity') Specification for the matrix to initialize. Possible options are 'identity', 'covariance', 'random', and a numpy array of shape (n_features, n_features). 'identity' An identity matrix of shape (n_features, n_features). 'covariance' The (pseudo-)inverse covariance matrix (raises an error if the covariance matrix is not definite and `strict_pd == True`) 'random' A random positive definite (PD) matrix of shape `(n_features, n_features)`, generated using `sklearn.datasets.make_spd_matrix`. numpy array A PSD matrix (or strictly PD if strict_pd==True) of shape (n_features, n_features), that will be used as such to initialize the metric, or set the prior. random_state : int or `numpy.RandomState` or None, optional (default=None) A pseudo random number generator object or a seed for it if int. If ``init='random'``, ``random_state`` is used to set the random Mahalanobis matrix. If ``init='pca'``, ``random_state`` is passed as an argument to PCA when initializing the matrix. return_inverse : bool, optional (default=False) Whether to return the inverse of the specified matrix. This can be sometimes useful. It will return the pseudo-inverse (which is the same as the inverse if the matrix is definite (i.e. invertible)). If `strict_pd == True` and the matrix is not definite, it will return an error. strict_pd : bool, optional (default=False) Whether to enforce that the provided matrix is definite (in addition to being PSD). param_name : str, optional (default='matrix') The name of the matrix used (example: 'init', 'prior'). Will be used in error messages. Returns ------- M, or (M, M_inv) : `numpy.ndarray` The initial matrix to use M, and its inverse if `return_inverse=True`. """ n_features = input.shape[-1] if isinstance(init, np.ndarray): # we copy the array, so that if we update the metric, we don't want to # update the init init = check_array(init, copy=True) # Assert that init.shape[1] = n_features if init.shape != (n_features, ) * 2: raise ValueError('The input dimensionality {} of the given ' 'mahalanobis matrix `{}` must match the ' 'dimensionality of the given inputs ({}).'.format( init.shape, matrix_name, n_features)) # Assert that the matrix is symmetric if not np.allclose(init, init.T): raise ValueError("`{}` is not symmetric.".format(matrix_name)) elif init not in ['identity', 'covariance', 'random']: raise ValueError( "`{}` must be 'identity', 'covariance', 'random' " "or a numpy array of shape (n_features, n_features).".format( matrix_name)) random_state = check_random_state(random_state) M = init if isinstance(M, np.ndarray): w, V = eigh(M, check_finite=False) init_is_definite = _check_sdp_from_eigen(w) if strict_pd and not init_is_definite: raise LinAlgError( "You should provide a strictly positive definite " "matrix as `{}`. This one is not definite. Try another" " {}, or an algorithm that does not " "require the {} to be strictly positive definite.".format( *((matrix_name, ) * 3))) elif return_inverse and not init_is_definite: warnings.warn('The initialization matrix is not invertible: ' 'using the pseudo-inverse instead.') if return_inverse: M_inv = _pseudo_inverse_from_eig(w, V) return M, M_inv else: return M elif init == 'identity': M = np.eye(n_features, n_features) if return_inverse: M_inv = M.copy() return M, M_inv else: return M elif init == 'covariance': if input.ndim == 3: # if the input are tuples, we need to form an X by deduplication X = np.vstack( {tuple(row) for row in input.reshape(-1, n_features)}) else: X = input # atleast2d is necessary to deal with scalar covariance matrices M_inv = np.atleast_2d(np.cov(X, rowvar=False)) w, V = eigh(M_inv, check_finite=False) cov_is_definite = _check_sdp_from_eigen(w) if strict_pd and not cov_is_definite: raise LinAlgError( "Unable to get a true inverse of the covariance " "matrix since it is not definite. Try another " "`{}`, or an algorithm that does not " "require the `{}` to be strictly positive definite.".format( *((matrix_name, ) * 2))) elif not cov_is_definite: warnings.warn( 'The covariance matrix is not invertible: ' 'using the pseudo-inverse instead.' 'To make the covariance matrix invertible' ' you can remove any linearly dependent features and/or ' 'reduce the dimensionality of your input, ' 'for instance using `sklearn.decomposition.PCA` as a ' 'preprocessing step.') M = _pseudo_inverse_from_eig(w, V) if return_inverse: return M, M_inv else: return M elif init == 'random': # we need to create a random symmetric matrix M = make_spd_matrix(n_features, random_state=random_state) if return_inverse: # we use pinvh even if we know the matrix is definite, just because # we need the returned matrix to be symmetric (and sometimes # np.linalg.inv returns not symmetric inverses of symmetric matrices) # TODO: there might be a more efficient method to do so M_inv = pinvh(M) return M, M_inv else: return M
def fit(self, X, y): """ The Gaussian Process model fitting method. Parameters ---------- X : double array_like An array with shape (n_samples, n_features) with the input at which observations were made. y : double array_like An array with shape (n_samples, ) or shape (n_samples, n_targets) with the observations of the output to be predicted. Returns ------- gp : self A fitted Gaussian Process model object awaiting data to perform predictions. """ K = self.calc_kernel_matrix(X) # # Force data to 2D numpy.array X = array2d(X) n_samples, n_features = X.shape y = sp.asarray(y) self.y_ndim_ = y.ndim if y.ndim == 1: y = y[:, sp.newaxis] _, n_targets = y.shape # # Normalise output data or not if self.normalise == 1: y_mean = sp.mean(y, axis=0) y_std = sp.std(y, axis=0) y_std[y_std == 0.] = 1. y = (y - y_mean) / y_std else: y_mean = 0.0 y_std = 1.0 err = 'Dummy error message' inverse = K + self.nugget * sp.ones(n_samples) try: # print "is symmetric", Cholesky.isSymmetric(inverse) # upper_triang = Cholesky.Cholesky(inverse) # inverse = Cholesky.CholeskyInverse(upper_triang) inverse = LA.inv(inverse) except LA.LinAlgError as err: print "inv failed: %s. Switching to pinvh" % err try: inverse = LA.pinvh(inverse) except LA.LinAlgError as err: print "pinvh failed: %s. Switching to pinv2" % err try: inverse = LA.pinv2(inverse) except LA.LinAlgError as err: print "pinv2 failed: %s. Failed to invert matrix." % err inverse = None # alpha is the vector of regression coefficients of GaussianProcess alpha = sp.dot(inverse, y) self.y = y self.y_mean, self.y_std = y_mean, y_std if not self.low_memory: self.inverse = inverse self.alpha = sp.array(alpha)
def fit(self, x, y): niter_max = self.niter_max l2 = self.l2 n_unique_y = len(np.unique(y)) if n_unique_y == 1: print( 'The training data set is USELESS because it contains only 1 class' ) elif n_unique_y == 2: # binary # convert 0,1 to -1, 1 y = 2 * y - 1. #print(niter_max) n = x.shape[1] y1 = (y + 1) / 2 x_av = np.mean(x, axis=0) dx = x - x_av c = np.cov(dx, rowvar=False, bias=True) # 2019.07.16: c += l2 * np.identity(n) / (2 * len(y)) c_inv = linalg.pinvh(c) # initial values h0 = 0. w = np.random.normal(0.0, 1. / np.sqrt(n), size=(n)) cost = np.full(niter_max, 100.) for iloop in range(niter_max): h = h0 + x.dot(w) y_model = np.tanh(h) # stopping criterion cost[iloop] = ((y[:] - y_model[:])**2).mean() # 2019.07.12: lost function #p = 1/(1+np.exp(-2*h)) #cost[iloop] = (-y1[:]*np.log(p) - (1-y1)*np.log(1-p)).mean() if iloop > 0 and cost[iloop] >= cost[iloop - 1]: break # update local field t = h != 0 h[t] *= y[t] / y_model[t] h[~t] = y[~t] # find w from h h_av = h.mean() dh = h - h_av dhdx = dh[:, np.newaxis] * dx[:, :] dhdx_av = dhdx.mean(axis=0) w = c_inv.dot(dhdx_av) h0 = h_av - x_av.dot(w) self.h0 = h0 self.w = w self.classtype = 'binary' else: # multiple classes """ ----------------------------------------------------------------------- 2019.06.14: fit h0 and w based on Expectation Reflection input: features x[l,n], target: y[l,m] (y = +/-1) output: h0[m], w[n,m] """ #def fit_multi(self,x,y,niter_max=500,l2=0.001): onehot_encoder = OneHotEncoder(sparse=False, categories='auto') y_onehot = onehot_encoder.fit_transform(y.reshape(-1, 1)) y_onehot = 2 * y_onehot - 1 # convert to -1, +1 y1 = (y + 1) / 2 # convert to 1, 1 #print(niter_max) n = x.shape[1] m = y_onehot.shape[1] # number of categories x_av = np.mean(x, axis=0) dx = x - x_av c = np.cov(dx, rowvar=False, bias=True) # 2019.07.16: l2 = lamda/(2L) c += l2 * np.identity(n) / (2 * len(y)) c_inv = linalg.pinvh(c) H0 = np.zeros(m) W = np.zeros((n, m)) for i in range(m): y = y_onehot[:, i] # initial values h0 = 0. w = np.random.normal(0.0, 1. / np.sqrt(n), size=(n)) cost = np.full(niter_max, 100.) for iloop in range(niter_max): h = h0 + x.dot(w) y_model = np.tanh(h) # stopping criterion cost[iloop] = ((y[:] - y_model[:])**2).mean() # 2019.07.12: lost function #p = 1/(1+np.exp(-2*h)) #cost[iloop] = (-y1[:]*np.log(p) - (1-y1)*np.log(1-p)).mean() if iloop > 0 and cost[iloop] >= cost[iloop - 1]: break # update local field t = h != 0 h[t] *= y[t] / y_model[t] h[~t] = y[~t] # find w from h h_av = h.mean() dh = h - h_av dhdx = dh[:, np.newaxis] * dx[:, :] dhdx_av = dhdx.mean(axis=0) w = c_inv.dot(dhdx_av) h0 = h_av - x_av.dot(w) H0[i] = h0 W[:, i] = w self.h0 = H0 self.w = W self.classtype = 'multi'
def first_derivative_h(D, K, delta=5): return - 0.5 * (D - (delta - 2) * linalg.pinvh(K))
def test_simple_real(self): a = array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=float) a = np.dot(a, a.T) a_pinv = pinvh(a) assert_array_almost_equal(np.dot(a, a_pinv), np.eye(3))
def fit(self, X, y, evidence_approx_method="fp", max_iter=100): ''' Fits Bayesian linear regression, returns posterior mean and preision of parameters Parameters ---------- X: array-like of size [n_samples,n_features] Matrix of explanatory variables (should not include bias term) Y: array-like of size [n_features] Vector of dependent variables. Returns ------- object: self self ''' # preprocess data X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) n_samples, n_features = X.shape X, y, X_mean, y_mean, X_std = self._center_data( X, y, self.fit_intercept, self.copy_X) self._x_mean_ = X_mean self._y_mean_ = y_mean self._x_std_ = X_std self.scores_ = [np.NINF] # precision of noise & and coefficients alpha = self.alpha var_y = np.var(y) # check that variance is non zero !!! if var_y == 0: beta = 1e-2 else: beta = 1. / np.var(y) # to speed all further computations save svd decomposition and reuse it later u, d, v = svd(X, full_matrices=False) Uy = np.dot(u.T, y) dsq = d**2 for i in range(self.n_iter): # find mean for posterior of w ( for EM this is E-step) p1_mu = v.T * (d / (dsq + alpha / beta)) mu = np.dot(p1_mu, Uy) # precompute errors, since both methods use it in estimation error = y - np.dot(X, mu) sqdErr = np.dot(error, error) if sqdErr / n_samples < self.lambda_0: self.perfect_fit = True warnings.warn( ('Almost perfect fit!!! Estimated values of variance ' 'for predictive distribution are computed using only ' 'Residual Sum of Squares, terefore they do not increase ' 'in case of extrapolation')) break if self.optimizer == "fp": gamma = np.sum(dsq / (dsq + alpha / beta)) # use updated mu and gamma parameters to update alpha and beta alpha = gamma / np.dot(mu, mu) beta = (n_samples - gamma) / sqdErr else: # M-step, update parameters alpha and beta to maximize ML TYPE II alpha = n_features / (np.dot(mu, mu) + np.sum(1 / (beta * dsq + alpha))) beta = n_samples / (sqdErr + np.sum(dsq / (beta * dsq + alpha))) # calculate log likelihood p(Y | X, alpha, beta) (constants are not included) normaliser = 0.5 * (n_features * np.log(alpha) + n_samples * np.log(beta)) normaliser -= 0.5 * np.sum(np.log(beta * dsq + alpha)) log_like = normaliser - 0.5 * alpha * np.sum(mu**2) log_like -= 0.5 * beta * sqdErr - 0.5 * n_samples * np.log( 2 * np.pi) self.scores_.append(log_like) if self.verbose: print(("Iteration {0} completed, value of log " "likelihood is {1}".format(i, log_like))) # if change in log-likelihood is smaller than threshold terminate converged = (self.scores_[-1] - self.scores_[-2] < self.tol) if converged or i == self.n_iter - 1: break # pinvh is used for numerical stability (inverse has clased form solution) self.sigma_ = pinvh(np.dot(v.T * (beta * dsq + alpha), v)) self.coef_ = beta * np.dot(self.sigma_, np.dot(X.T, y)) self._set_intercept(X_mean, y_mean, X_std) self.beta_ = beta self.alpha_ = alpha return self
def graphical_lasso(emp_cov, alpha, cov_init=None, mode='cd', tol=1e-4, enet_tol=1e-4, max_iter=100, verbose=False, return_costs=False, eps=np.finfo(np.float64).eps, return_n_iter=False): """l1-penalized covariance estimator Read more in the :ref:`User Guide <sparse_inverse_covariance>`. Parameters ---------- emp_cov : 2D ndarray, shape (n_features, n_features) Empirical covariance from which to compute the covariance estimate. alpha : positive float The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. cov_init : 2D array (n_features, n_features), optional The initial guess for the covariance. mode : {'cd', 'lars'} The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable. tol : positive float, optional The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. enet_tol : positive float, optional The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. max_iter : integer, optional The maximum number of iterations. verbose : boolean, optional If verbose is True, the objective function and dual gap are printed at each iteration. return_costs : boolean, optional If return_costs is True, the objective function and dual gap at each iteration are returned. eps : float, optional The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. return_n_iter : bool, optional Whether or not to return the number of iterations. Returns ------- covariance : 2D ndarray, shape (n_features, n_features) The estimated covariance matrix. precision : 2D ndarray, shape (n_features, n_features) The estimated (sparse) precision matrix. costs : list of (objective, dual_gap) pairs The list of values of the objective function and the dual gap at each iteration. Returned only if return_costs is True. n_iter : int Number of iterations. Returned only if `return_n_iter` is set to True. See Also -------- GraphicalLasso, GraphicalLassoCV Notes ----- The algorithm employed to solve this problem is the GLasso algorithm, from the Friedman 2008 Biostatistics paper. It is the same algorithm as in the R `glasso` package. One possible difference with the `glasso` R package is that the diagonal coefficients are not penalized. """ _, n_features = emp_cov.shape if alpha == 0: if return_costs: precision_ = linalg.inv(emp_cov) cost = - 2. * log_likelihood(emp_cov, precision_) cost += n_features * np.log(2 * np.pi) d_gap = np.sum(emp_cov * precision_) - n_features if return_n_iter: return emp_cov, precision_, (cost, d_gap), 0 else: return emp_cov, precision_, (cost, d_gap) else: if return_n_iter: return emp_cov, linalg.inv(emp_cov), 0 else: return emp_cov, linalg.inv(emp_cov) if cov_init is None: covariance_ = emp_cov.copy() else: covariance_ = cov_init.copy() # As a trivial regularization (Tikhonov like), we scale down the # off-diagonal coefficients of our starting point: This is needed, as # in the cross-validation the cov_init can easily be # ill-conditioned, and the CV loop blows. Beside, this takes # conservative stand-point on the initial conditions, and it tends to # make the convergence go faster. covariance_ *= 0.95 diagonal = emp_cov.flat[::n_features + 1] covariance_.flat[::n_features + 1] = diagonal precision_ = linalg.pinvh(covariance_) indices = np.arange(n_features) costs = list() # The different l1 regression solver have different numerical errors if mode == 'cd': errors = dict(over='raise', invalid='ignore') else: errors = dict(invalid='raise') try: # be robust to the max_iter=0 edge case, see: # https://github.com/scikit-learn/scikit-learn/issues/4134 d_gap = np.inf # set a sub_covariance buffer sub_covariance = np.ascontiguousarray(covariance_[1:, 1:]) for i in range(max_iter): for idx in range(n_features): # To keep the contiguous matrix `sub_covariance` equal to # covariance_[indices != idx].T[indices != idx] # we only need to update 1 column and 1 line when idx changes if idx > 0: di = idx - 1 sub_covariance[di] = covariance_[di][indices != idx] sub_covariance[:, di] = covariance_[:, di][indices != idx] else: sub_covariance[:] = covariance_[1:, 1:] row = emp_cov[idx, indices != idx] with np.errstate(**errors): if mode == 'cd': # Use coordinate descent coefs = -(precision_[indices != idx, idx] / (precision_[idx, idx] + 1000 * eps)) coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram( coefs, alpha, 0, sub_covariance, row, row, max_iter, enet_tol, check_random_state(None), False) else: # Use LARS _, _, coefs = lars_path( sub_covariance, row, Xy=row, Gram=sub_covariance, alpha_min=alpha / (n_features - 1), copy_Gram=True, eps=eps, method='lars', return_path=False) # Update the precision matrix precision_[idx, idx] = ( 1. / (covariance_[idx, idx] - np.dot(covariance_[indices != idx, idx], coefs))) precision_[indices != idx, idx] = (- precision_[idx, idx] * coefs) precision_[idx, indices != idx] = (- precision_[idx, idx] * coefs) coefs = np.dot(sub_covariance, coefs) covariance_[idx, indices != idx] = coefs covariance_[indices != idx, idx] = coefs d_gap = _dual_gap(emp_cov, precision_, alpha) cost = _objective(emp_cov, precision_, alpha) if verbose: print('[graphical_lasso] Iteration ' '% 3i, cost % 3.2e, dual gap %.3e' % (i, cost, d_gap)) if return_costs: costs.append((cost, d_gap)) if np.abs(d_gap) < tol: break if not np.isfinite(cost) and i > 0: raise FloatingPointError('Non SPD result: the system is ' 'too ill-conditioned for this solver') else: warnings.warn('graphical_lasso: did not converge after ' '%i iteration: dual gap: %.3e' % (max_iter, d_gap), ConvergenceWarning) except FloatingPointError as e: e.args = (e.args[0] + '. The system is too ill-conditioned for this solver',) raise e if return_costs: if return_n_iter: return covariance_, precision_, costs, i + 1 else: return covariance_, precision_, costs else: if return_n_iter: return covariance_, precision_, i + 1 else: return covariance_, precision_
def test_simple_complex(self): a = (array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=float) + 1j * array([[10, 8, 7], [6, 5, 4], [3, 2, 1]], dtype=float)) a = np.dot(a, a.conj().T) a_pinv = pinvh(a) assert_array_almost_equal(np.dot(a, a_pinv), np.eye(3))
def kernel_time_graphical_lasso( emp_cov, alpha=0.01, rho=1, kernel=None, max_iter=100, n_samples=None, verbose=False, psi="laplacian", tol=1e-4, rtol=1e-4, return_history=False, return_n_iter=True, mode="admm", update_rho_options=None, compute_objective=True, stop_at=None, stop_when=1e-4, init="empirical", ): """Time-varying graphical lasso solver. Solves the following problem via ADMM: min sum_{i=1}^T -n_i log_likelihood(K_i-L_i) + alpha ||K_i||_{od,1} + sum_{s>t}^T k_psi(s,t) Psi(K_s - K_t) where S is the empirical covariance of the data matrix D (training observations by features). Parameters ---------- emp_cov : ndarray, shape (n_features, n_features) Empirical covariance of data. alpha, beta : float, optional Regularisation parameter. rho : float, optional Augmented Lagrangian parameter. max_iter : int, optional Maximum number of iterations. tol : float, optional Absolute tolerance for convergence. rtol : float, optional Relative tolerance for convergence. return_history : bool, optional Return the history of computed values. init : {'empirical', 'zeros', ndarray}, default 'empirical' How to initialise the inverse covariance matrix. Default is take the empirical covariance and inverting it. Returns ------- X : numpy.array, 2-dimensional Solution to the problem. history : list If return_history, then also a structure that contains the objective value, the primal and dual residual norms, and tolerances for the primal and dual residual norms at each iteration. """ psi, prox_psi, psi_node_penalty = check_norm_prox(psi) n_times, _, n_features = emp_cov.shape if kernel is None: kernel = np.eye(n_times) Z_0 = init_precision(emp_cov, mode=init) U_0 = np.zeros_like(Z_0) Z_0_old = np.zeros_like(Z_0) Z_M, Z_M_old = {}, {} U_M = {} for m in range(1, n_times): # all possible markovians jumps Z_L = Z_0.copy()[:-m] Z_R = Z_0.copy()[m:] Z_M[m] = (Z_L, Z_R) U_L = np.zeros_like(Z_L) U_R = np.zeros_like(Z_R) U_M[m] = (U_L, U_R) Z_L_old = np.zeros_like(Z_L) Z_R_old = np.zeros_like(Z_R) Z_M_old[m] = (Z_L_old, Z_R_old) if n_samples is None: n_samples = np.ones(n_times) checks = [ convergence(obj=objective(n_samples, emp_cov, Z_0, Z_0, Z_M, alpha, kernel, psi)) ] for iteration_ in range(max_iter): # update K A = Z_0 - U_0 for m in range(1, n_times): A[:-m] += Z_M[m][0] - U_M[m][0] A[m:] += Z_M[m][1] - U_M[m][1] A /= n_times # soft_thresholding_ = partial(soft_thresholding, lamda=alpha / rho) # K = np.array(map(soft_thresholding_, A)) A += A.transpose(0, 2, 1) A /= 2.0 A *= -rho * n_times / n_samples[:, None, None] A += emp_cov K = np.array([ prox_logdet(a, lamda=ni / (rho * n_times)) for a, ni in zip(A, n_samples) ]) # update Z_0 A = K + U_0 A += A.transpose(0, 2, 1) A /= 2.0 Z_0 = soft_thresholding(A, lamda=alpha / rho) # update residuals U_0 += K - Z_0 # other Zs for m in range(1, n_times): U_L, U_R = U_M[m] A_L = K[:-m] + U_L A_R = K[m:] + U_R if not psi_node_penalty: prox_e = prox_psi(A_R - A_L, lamda=2.0 * np.diag(kernel, m)[:, None, None] / rho) Z_L = 0.5 * (A_L + A_R - prox_e) Z_R = 0.5 * (A_L + A_R + prox_e) else: Z_L, Z_R = prox_psi( np.concatenate((A_L, A_R), axis=1), lamda=0.5 * np.diag(kernel, m)[:, None, None] / rho, rho=rho, tol=tol, rtol=rtol, max_iter=max_iter, ) Z_M[m] = (Z_L, Z_R) # update other residuals U_L += K[:-m] - Z_L U_R += K[m:] - Z_R # diagnostics, reporting, termination checks rnorm = np.sqrt( squared_norm(K - Z_0) + sum( squared_norm(K[:-m] - Z_M[m][0]) + squared_norm(K[m:] - Z_M[m][1]) for m in range(1, n_times))) snorm = rho * np.sqrt( squared_norm(Z_0 - Z_0_old) + sum( squared_norm(Z_M[m][0] - Z_M_old[m][0]) + squared_norm(Z_M[m][1] - Z_M_old[m][1]) for m in range(1, n_times))) obj = objective(n_samples, emp_cov, Z_0, K, Z_M, alpha, kernel, psi) if compute_objective else np.nan check = convergence( obj=obj, rnorm=rnorm, snorm=snorm, e_pri=n_features * n_times * tol + rtol * max( np.sqrt( squared_norm(Z_0) + sum( squared_norm(Z_M[m][0]) + squared_norm(Z_M[m][1]) for m in range(1, n_times))), np.sqrt( squared_norm(K) + sum( squared_norm(K[:-m]) + squared_norm(K[m:]) for m in range(1, n_times))), ), e_dual=n_features * n_times * tol + rtol * rho * np.sqrt( squared_norm(U_0) + sum( squared_norm(U_M[m][0]) + squared_norm(U_M[m][1]) for m in range(1, n_times))), ) Z_0_old = Z_0.copy() for m in range(1, n_times): Z_M_old[m] = (Z_M[m][0].copy(), Z_M[m][1].copy()) if verbose: print("obj: %.4f, rnorm: %.4f, snorm: %.4f," "eps_pri: %.4f, eps_dual: %.4f" % check[:5]) checks.append(check) if stop_at is not None: if abs(check.obj - stop_at) / abs(stop_at) < stop_when: break if check.rnorm <= check.e_pri and check.snorm <= check.e_dual: break rho_new = update_rho(rho, rnorm, snorm, iteration=iteration_, **(update_rho_options or {})) # scaled dual variables should be also rescaled U_0 *= rho / rho_new for m in range(1, n_times): U_L, U_R = U_M[m] U_L *= rho / rho_new U_R *= rho / rho_new rho = rho_new else: warnings.warn("Objective did not converge.") covariance_ = np.array([linalg.pinvh(x) for x in Z_0]) return_list = [Z_0, covariance_] if return_history: return_list.append(checks) if return_n_iter: return_list.append(iteration_ + 1) return return_list
def fit(self, X, y): """Fit the ARDRegression model according to the given training data and parameters. Iterative procedure to maximize the evidence Parameters ---------- X : array-like, shape = [n_samples, n_features] Training vector, where n_samples in the number of samples and n_features is the number of features. y : array, shape = [n_samples] Target values (integers). Will be cast to X's dtype if necessary Returns ------- self : returns an instance of self. """ X, y = check_X_y(X, y, dtype=np.float64, y_numeric=True) n_samples, n_features = X.shape coef_ = np.zeros(n_features) X, y, X_offset_, y_offset_, X_scale_ = self._preprocess_data( X, y, self.fit_intercept, self.normalize, self.copy_X) # Launch the convergence loop keep_lambda = np.ones(n_features, dtype=bool) lambda_1 = self.lambda_1 lambda_2 = self.lambda_2 alpha_1 = self.alpha_1 alpha_2 = self.alpha_2 verbose = self.verbose # Initialization of the values of the parameters alpha_ = 1. / np.var(y) lambda_ = np.ones(n_features) self.scores_ = list() coef_old_ = None # Iterative procedure of ARDRegression for iter_ in range(self.n_iter): # Compute mu and sigma (using Woodbury matrix identity) sigma_ = pinvh(np.eye(n_samples) / alpha_ + np.dot(X[:, keep_lambda] * np.reshape(1. / lambda_[keep_lambda], [1, -1]), X[:, keep_lambda].T)) sigma_ = np.dot(sigma_, X[:, keep_lambda] * np.reshape(1. / lambda_[keep_lambda], [1, -1])) sigma_ = - np.dot(np.reshape(1. / lambda_[keep_lambda], [-1, 1]) * X[:, keep_lambda].T, sigma_) sigma_.flat[::(sigma_.shape[1] + 1)] += 1. / lambda_[keep_lambda] coef_[keep_lambda] = alpha_ * np.dot( sigma_, np.dot(X[:, keep_lambda].T, y)) # Update alpha and lambda rmse_ = np.sum((y - np.dot(X, coef_)) ** 2) gamma_ = 1. - lambda_[keep_lambda] * np.diag(sigma_) lambda_[keep_lambda] = ((gamma_ + 2. * lambda_1) / ((coef_[keep_lambda]) ** 2 + 2. * lambda_2)) alpha_ = ((n_samples - gamma_.sum() + 2. * alpha_1) / (rmse_ + 2. * alpha_2)) # Prune the weights with a precision over a threshold keep_lambda = lambda_ < self.threshold_lambda coef_[~keep_lambda] = 0 # Compute the objective function if self.compute_score: s = (lambda_1 * np.log(lambda_) - lambda_2 * lambda_).sum() s += alpha_1 * log(alpha_) - alpha_2 * alpha_ s += 0.5 * (fast_logdet(sigma_) + n_samples * log(alpha_) + np.sum(np.log(lambda_))) s -= 0.5 * (alpha_ * rmse_ + (lambda_ * coef_ ** 2).sum()) self.scores_.append(s) # Check for convergence if iter_ > 0 and np.sum(np.abs(coef_old_ - coef_)) < self.tol: if verbose: print("Converged after %s iterations" % iter_) break coef_old_ = np.copy(coef_) self.coef_ = coef_ self.alpha_ = alpha_ self.sigma_ = sigma_ self.lambda_ = lambda_ self._set_intercept(X_offset_, y_offset_, X_scale_) return self
def graphical_lasso( emp_cov, alpha, *, cov_init=None, mode="cd", tol=1e-4, enet_tol=1e-4, max_iter=100, verbose=False, return_costs=False, eps=np.finfo(np.float64).eps, return_n_iter=False, ): """l1-penalized covariance estimator Read more in the :ref:`User Guide <sparse_inverse_covariance>`. .. versionchanged:: v0.20 graph_lasso has been renamed to graphical_lasso Parameters ---------- emp_cov : ndarray of shape (n_features, n_features) Empirical covariance from which to compute the covariance estimate. alpha : float The regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. Range is (0, inf]. cov_init : array of shape (n_features, n_features), default=None The initial guess for the covariance. If None, then the empirical covariance is used. mode : {'cd', 'lars'}, default='cd' The Lasso solver to use: coordinate descent or LARS. Use LARS for very sparse underlying graphs, where p > n. Elsewhere prefer cd which is more numerically stable. tol : float, default=1e-4 The tolerance to declare convergence: if the dual gap goes below this value, iterations are stopped. Range is (0, inf]. enet_tol : float, default=1e-4 The tolerance for the elastic net solver used to calculate the descent direction. This parameter controls the accuracy of the search direction for a given column update, not of the overall parameter estimate. Only used for mode='cd'. Range is (0, inf]. max_iter : int, default=100 The maximum number of iterations. verbose : bool, default=False If verbose is True, the objective function and dual gap are printed at each iteration. return_costs : bool, default=Flase If return_costs is True, the objective function and dual gap at each iteration are returned. eps : float, default=eps The machine-precision regularization in the computation of the Cholesky diagonal factors. Increase this for very ill-conditioned systems. Default is `np.finfo(np.float64).eps`. return_n_iter : bool, default=False Whether or not to return the number of iterations. Returns ------- covariance : ndarray of shape (n_features, n_features) The estimated covariance matrix. precision : ndarray of shape (n_features, n_features) The estimated (sparse) precision matrix. costs : list of (objective, dual_gap) pairs The list of values of the objective function and the dual gap at each iteration. Returned only if return_costs is True. n_iter : int Number of iterations. Returned only if `return_n_iter` is set to True. See Also -------- GraphicalLasso, GraphicalLassoCV Notes ----- The algorithm employed to solve this problem is the GLasso algorithm, from the Friedman 2008 Biostatistics paper. It is the same algorithm as in the R `glasso` package. One possible difference with the `glasso` R package is that the diagonal coefficients are not penalized. """ _, n_features = emp_cov.shape if alpha == 0: if return_costs: precision_ = linalg.inv(emp_cov) cost = -2.0 * log_likelihood(emp_cov, precision_) cost += n_features * np.log(2 * np.pi) d_gap = np.sum(emp_cov * precision_) - n_features if return_n_iter: return emp_cov, precision_, (cost, d_gap), 0 else: return emp_cov, precision_, (cost, d_gap) else: if return_n_iter: return emp_cov, linalg.inv(emp_cov), 0 else: return emp_cov, linalg.inv(emp_cov) if cov_init is None: covariance_ = emp_cov.copy() else: covariance_ = cov_init.copy() # As a trivial regularization (Tikhonov like), we scale down the # off-diagonal coefficients of our starting point: This is needed, as # in the cross-validation the cov_init can easily be # ill-conditioned, and the CV loop blows. Beside, this takes # conservative stand-point on the initial conditions, and it tends to # make the convergence go faster. covariance_ *= 0.95 diagonal = emp_cov.flat[:: n_features + 1] covariance_.flat[:: n_features + 1] = diagonal precision_ = linalg.pinvh(covariance_) indices = np.arange(n_features) costs = list() # The different l1 regression solver have different numerical errors if mode == "cd": errors = dict(over="raise", invalid="ignore") else: errors = dict(invalid="raise") try: # be robust to the max_iter=0 edge case, see: # https://github.com/scikit-learn/scikit-learn/issues/4134 d_gap = np.inf # set a sub_covariance buffer sub_covariance = np.copy(covariance_[1:, 1:], order="C") for i in range(max_iter): for idx in range(n_features): # To keep the contiguous matrix `sub_covariance` equal to # covariance_[indices != idx].T[indices != idx] # we only need to update 1 column and 1 line when idx changes if idx > 0: di = idx - 1 sub_covariance[di] = covariance_[di][indices != idx] sub_covariance[:, di] = covariance_[:, di][indices != idx] else: sub_covariance[:] = covariance_[1:, 1:] row = emp_cov[idx, indices != idx] with np.errstate(**errors): if mode == "cd": # Use coordinate descent coefs = -( precision_[indices != idx, idx] / (precision_[idx, idx] + 1000 * eps) ) coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram( coefs, alpha, 0, sub_covariance, row, row, max_iter, enet_tol, check_random_state(None), False, ) else: # Use LARS _, _, coefs = lars_path_gram( Xy=row, Gram=sub_covariance, n_samples=row.size, alpha_min=alpha / (n_features - 1), copy_Gram=True, eps=eps, method="lars", return_path=False, ) # Update the precision matrix precision_[idx, idx] = 1.0 / ( covariance_[idx, idx] - np.dot(covariance_[indices != idx, idx], coefs) ) precision_[indices != idx, idx] = -precision_[idx, idx] * coefs precision_[idx, indices != idx] = -precision_[idx, idx] * coefs coefs = np.dot(sub_covariance, coefs) covariance_[idx, indices != idx] = coefs covariance_[indices != idx, idx] = coefs if not np.isfinite(precision_.sum()): raise FloatingPointError( "The system is too ill-conditioned " "for this solver" ) d_gap = _dual_gap(emp_cov, precision_, alpha) cost = _objective(emp_cov, precision_, alpha) if verbose: print( "[graphical_lasso] Iteration " "% 3i, cost % 3.2e, dual gap %.3e" % (i, cost, d_gap) ) if return_costs: costs.append((cost, d_gap)) if np.abs(d_gap) < tol: break if not np.isfinite(cost) and i > 0: raise FloatingPointError( "Non SPD result: the system is " "too ill-conditioned for this solver" ) else: warnings.warn( "graphical_lasso: did not converge after " "%i iteration: dual gap: %.3e" % (max_iter, d_gap), ConvergenceWarning, ) except FloatingPointError as e: e.args = (e.args[0] + ". The system is too ill-conditioned for this solver",) raise e if return_costs: if return_n_iter: return covariance_, precision_, costs, i + 1 else: return covariance_, precision_, costs else: if return_n_iter: return covariance_, precision_, i + 1 else: return covariance_, precision_
def compute_density_PAk_gCorr( self, gauss_approx=True, alpha=1.0, log_den_PAk=None, log_den_PAk_err=None, comp_err=True, ): """ finds the maximum likelihood solution of PAk likelihood + gCorr likelihood with deltaFijs computed using the gradients """ # TODO: we need to impement the deltaFijs to be computed as a*l (as in PAk) # compute changes in free energy if self.Fij_array is None: self.compute_deltaFs_grads_semisum() if self.verb: print("PAk_gCorr density estimation started") sec = time.time() dc = np.empty(self.N, dtype=float) log_den = np.empty(self.N, dtype=float) log_den_err = np.zeros(self.N, dtype=float) prefactor = np.exp( self.intrinsic_dim / 2.0 * np.log(np.pi) - gammaln((self.intrinsic_dim + 2) / 2) ) log_den_min = 9.9e300 vij_list = [] Fij_list = [] Fij_var_list = [] if gauss_approx is True: if self.verb: print("Maximising likelihood in Gaussian approximation") if log_den_PAk is not None and log_den_PAk_err is not None: self.log_den = log_den_PAk self.log_den_err = log_den_PAk_err else: self.compute_density_PAk() # compute adjacency matrix and cumulative changes A = sparse.lil_matrix((self.N, self.N), dtype=np.float_) supp_deltaF = sparse.lil_matrix((self.N, self.N), dtype=np.float_) # define redundancy factor for each A matrix entry as the geometric mean of the 2 corresponding k* k1 = self.kstar[self.nind_list[:, 0]] k2 = self.kstar[self.nind_list[:, 1]] redundancy = np.sqrt(k1 * k2) for nspar, indices in enumerate(self.nind_list): i = indices[0] j = indices[1] # tmp = 1.0 / self.Fij_var_array[nspar] tmp = 1.0 / self.Fij_var_array[nspar] / redundancy[nspar] A[i, j] = -tmp supp_deltaF[i, j] = self.Fij_array[nspar] * tmp A = alpha * sparse.lil_matrix(A + A.transpose()) diag = ( np.array(-A.sum(axis=1)).reshape((self.N,)) + (1.0 - alpha) / self.log_den_err**2 ) A.setdiag(diag) deltaFcum = ( alpha * ( np.array(supp_deltaF.sum(axis=0)).reshape((self.N,)) - np.array(supp_deltaF.sum(axis=1)).reshape((self.N,)) ) + (1.0 - alpha) * self.log_den / self.log_den_err**2 ) sec2 = time.time() if self.verb: print("{0:0.2f} seconds to fill sparse matrix".format(sec2 - sec)) log_den = sparse.linalg.spsolve(A.tocsr(), deltaFcum) if self.verb: print( "{0:0.2f} seconds to solve linear system".format(time.time() - sec2) ) sec2 = time.time() self.log_den = log_den if comp_err is True: self.A = A.todense() self.B = slin.pinvh(self.A) # self.B = slin.inv(self.A) self.log_den_err = np.sqrt(np.diag(self.B)) if self.verb: print("{0:0.2f} seconds inverting A matrix".format(time.time() - sec2)) sec2 = time.time() # self.log_den_err = np.sqrt(diag/(np.array(np.sum(np.square(A.todense()),axis=1)).reshape(self.N,))) else: if self.verb: print("Solving via SGD") from dadapy.utils_.mlmax_pytorch import maximise_wPAk for i in range(self.N): Fij_list.append( self.Fij_array[self.nind_iptr[i] : self.nind_iptr[i + 1]] ) Fij_var_list.append( self.Fij_var_array[self.nind_iptr[i] : self.nind_iptr[i + 1]] ) dc[i] = self.distances[i, self.kstar[i]] rr = np.log(self.kstar[i]) - ( np.log(prefactor) + self.intrinsic_dim * np.log(self.distances[i, self.kstar[i]]) ) log_den[i] = rr vj = np.zeros(self.kstar[i]) for j in range(self.kstar[i]): vj[j] = prefactor * ( pow(self.distances[i, j + 1], self.intrinsic_dim) - pow(self.distances[i, j], self.intrinsic_dim) ) vij_list.append(vj) l_, log_den = maximise_wPAk( log_den, self.kstar, vij_list, self.dist_indices, Fij_list, Fij_var_list, alpha, ) log_den -= np.log(self.N) self.log_den = log_den sec2 = time.time() if self.verb: print( "{0:0.2f} seconds for PAk_gCorr density estimation".format(sec2 - sec) )
def hamiltonian(traj_list, traj_alive, cent_list=None): """Builds the Hamiltonian matrix from a list of trajectories.""" n_alive = len(traj_alive) if glbl.integrals.hermitian: n_elem = int(n_alive * (n_alive + 1) / 2) else: n_elem = n_alive * n_alive T = np.zeros((n_alive, n_alive), dtype=complex) V = np.zeros((n_alive, n_alive), dtype=complex) H = np.zeros((n_alive, n_alive), dtype=complex) S = np.zeros((n_alive, n_alive), dtype=complex) Snuc = np.zeros((n_alive, n_alive), dtype=complex) Sinv = np.zeros((n_alive, n_alive), dtype=complex) Sdot = np.zeros((n_alive, n_alive), dtype=complex) Heff = np.zeros((n_alive, n_alive), dtype=complex) t_ovrlp = np.zeros((n_alive, n_alive), dtype=complex) Sdnuc = np.zeros((n_alive, n_alive), dtype=complex) Sdele = np.zeros((n_alive, n_alive), dtype=complex) # now evaluate the hamiltonian matrix for ij in range(n_elem): if glbl.integrals.hermitian: i, j = ut_ind(ij) else: i, j = sq_ind(ij, n_alive) ii = traj_alive[i] jj = traj_alive[j] # nuclear overlap matrix (excluding electronic component) Snuc[i, j] = glbl.integrals.s_integral(traj_list[ii], traj_list[jj], nuc_only=True) # compute overlap of trajectories (different from S, which may or may # not involve integration in a gaussian basis t_ovrlp[i, j] = glbl.integrals.traj_overlap(traj_list[ii], traj_list[jj], Snuc=Snuc[i, j]) # overlap matrix (including electronic component) S[i, j] = glbl.integrals.s_integral(traj_list[ii], traj_list[jj], Snuc=Snuc[i, j]) # time-derivative of the overlap matrix (not hermitian in general) Sdot[i, j] = glbl.integrals.sdot_integral(traj_list[ii], traj_list[jj], Snuc=Snuc[i, j]) Sdnuc[i, j] = glbl.integrals.sdot_integral(traj_list[ii], traj_list[jj], Snuc=Snuc[i, j], nuc_only=True) Sdele[i, j] = glbl.integrals.sdot_integral(traj_list[ii], traj_list[jj], Snuc=Snuc[i, j], e_only=True) # kinetic energy matrix T[i, j] = glbl.integrals.ke_integral(traj_list[ii], traj_list[jj], Snuc=Snuc[i, j]) # potential energy matrix if glbl.integrals.require_centroids: V[i, j] = glbl.integrals.v_integral(traj_list[ii], traj_list[jj], centroid=cent_list[ii][jj], Snuc=Snuc[i, j]) else: V[i, j] = glbl.integrals.v_integral(traj_list[ii], traj_list[jj], Snuc=Snuc[i, j]) # Hamiltonian matrix in non-orthogonal basis H[i, j] = T[i, j] + V[i, j] # if hermitian matrix, set (j,i) indices if glbl.integrals.hermitian and i != j: Snuc[j, i] = Snuc[i, j].conjugate() S[j, i] = S[i, j].conjugate() t_ovrlp[j, i] = t_ovrlp[i, j].conjugate() Sdot[j, i] = glbl.integrals.sdot_integral(traj_list[jj], traj_list[ii], Snuc=Snuc[j, i]) Sdnuc[j, i] = glbl.integrals.sdot_integral(traj_list[jj], traj_list[ii], Snuc=Snuc[j, i], nuc_only=True) Sdele[j, i] = glbl.integrals.sdot_integral(traj_list[jj], traj_list[ii], Snuc=Snuc[j, i], e_only=True) T[j, i] = T[i, j].conjugate() V[j, i] = V[i, j].conjugate() H[j, i] = H[i, j].conjugate() if glbl.integrals.hermitian: # compute the S^-1, needed to compute Heff timings.start('linalg.pinvh') Sinv = sp_linalg.pinvh(S) # Sinv, cond = fms_linalg.pseudo_inverse2(S) timings.stop('linalg.pinvh') else: # compute the S^-1, needed to compute Heff timings.start('hamiltonian.pseudo_inverse') Sinv, cond = fms_linalg.pseudo_inverse(S) timings.stop('hamiltonian.pseudo_inverse') Heff = np.dot(Sinv, H - 1j * Sdot) fileio.print_bund_mat(0., 'sdot_nuc', Sdnuc) fileio.print_bund_mat(0., 'sdot_ele', Sdele) return t_ovrlp, T, V, S, Snuc, Sdot, Heff