def fit(self, balance_param=0.5, sparsity_param=0.01, verbose=False): ''' balance_param: trades off between sparsity and M0 prior sparsity_param: trades off between optimizer and sparseness (see graph_lasso) ''' P = pinvh(self.M) + balance_param * self.loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, sparsity_param, verbose=verbose)
def fit(self, X, W, verbose=False): """ X: data matrix, (n x d) W: connectivity graph, (n x n). +1 for positive pairs, -1 for negative. """ self._prepare_inputs(X, W) P = pinvh(self.M) + self.balance_param * self.loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, self.sparsity_param, verbose=verbose) return self
def fit(self, X, W): """ X: data matrix, (n x d) each row corresponds to a single instance W: connectivity graph, (n x n). +1 for positive pairs, -1 for negative. """ self._prepare_inputs(X, W) P = pinvh(self.M) + self.params['balance_param'] * self.loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, self.params['sparsity_param'], verbose=self.params['verbose']) return self
def fit(self, X, W): """ X: data matrix, (n x d) each row corresponds to a single instance W: connectivity graph, (n x n) +1 for positive pairs, -1 for negative. """ self._prepare_inputs(X, W) P = pinvh(self.M) + self.params['balance_param'] * self.loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M, _ = graph_lasso(emp_cov, self.params['sparsity_param'], verbose=self.params['verbose']) return self
def correct_covariance(self, data, method=None): """Apply a correction to raw Minimum Covariance Determinant estimates. Correction using the empirical correction factor suggested by Rousseeuw and Van Driessen in [Rouseeuw1984]_. Parameters ---------- data: array-like, shape (n_samples, n_features) The data matrix, with p features and n samples. The data set must be the one which was used to compute the raw estimates. Returns ------- covariance_corrected: array-like, shape (n_features, n_features) Corrected robust covariance estimate. """ if method is "empirical": X_c = data - self.raw_location_ dist = np.sum( np.dot(X_c, pinvh(self.raw_covariance_)) * X_c, 1) correction = np.median(dist) / sp.stats.chi2( data.shape[1]).isf(0.5) covariance_corrected = self.raw_covariance_ * correction elif method is "theoretical": n, p = data.shape c = sp.stats.chi2(p + 2).cdf(sp.stats.chi2(p).ppf(self.h)) / self.h covariance_corrected = self.raw_covariance_ * c else: covariance_corrected = self.raw_covariance_ self._set_covariance(covariance_corrected) return covariance_corrected
def _posterior_dist(self, X, y, A): ''' Uses Laplace approximation for calculating posterior distribution ''' f = lambda w: _logistic_cost_grad(X, y, w, A) w_init = np.random.random(X.shape[1]) Mn = fmin_l_bfgs_b(f, x0=w_init, pgtol=self.tol_solver, maxiter=self.n_iter_solver)[0] Xm = np.dot(X, Mn) s = expit(Xm) B = logistic._pdf(Xm) # avoids underflow S = np.dot(X.T * B, X) np.fill_diagonal(S, np.diag(S) + A) t_hat = y - s cholesky = True # try using Cholesky , if it fails then fall back on pinvh try: R = np.linalg.cholesky(S) Sn = solve_triangular(R, np.eye(A.shape[0]), check_finite=False, lower=True) except LinAlgError: Sn = pinvh(S) cholesky = False return [Mn, Sn, B, t_hat, cholesky]
def correct_covariance(self, data, method=None): """Apply a correction to raw Minimum Covariance Determinant estimates. Correction using the empirical correction factor suggested by Rousseeuw and Van Driessen in [Rouseeuw1984]_. Parameters ---------- data: array-like, shape (n_samples, n_features) The data matrix, with p features and n samples. The data set must be the one which was used to compute the raw estimates. Returns ------- covariance_corrected: array-like, shape (n_features, n_features) Corrected robust covariance estimate. """ if method is "empirical": X_c = data - self.raw_location_ dist = np.sum(np.dot(X_c, pinvh(self.raw_covariance_)) * X_c, 1) correction = np.median(dist) / sp.stats.chi2( data.shape[1]).isf(0.5) covariance_corrected = self.raw_covariance_ * correction elif method is "theoretical": n, p = data.shape c = sp.stats.chi2(p + 2).cdf(sp.stats.chi2(p).ppf(self.h)) / self.h covariance_corrected = self.raw_covariance_ * c else: covariance_corrected = self.raw_covariance_ self._set_covariance(covariance_corrected) return covariance_corrected
def _posterior_dist(self, A, beta, XX, XY, full_covar=False): ''' Calculates mean and covariance matrix of posterior distribution of coefficients. ''' # compute precision matrix for active features Sinv = beta * XX np.fill_diagonal(Sinv, np.diag(Sinv) + A) cholesky = True # try cholesky, if it fails go back to pinvh try: # find posterior mean : R*R.T*mean = beta*X.T*Y # solve(R*z = beta*X.T*Y) => find z => solve(R.T*mean = z) => find mean R = np.linalg.cholesky(Sinv) Z = solve_triangular(R, beta * XY, check_finite=False, lower=True) Mn = solve_triangular(R.T, Z, check_finite=False, lower=False) # invert lower triangular matrix from cholesky decomposition Ri = solve_triangular(R, np.eye(A.shape[0]), check_finite=False, lower=True) if full_covar: Sn = np.dot(Ri.T, Ri) return Mn, Sn, cholesky else: return Mn, Ri, cholesky except LinAlgError: cholesky = False Sn = pinvh(Sinv) Mn = beta * np.dot(Sinv, XY) return Mn, Sn, cholesky
def _posterior_dist(self,A,beta,XX,XY,full_covar=False): ''' Calculates mean and covariance matrix of posterior distribution of coefficients. ''' # compute precision matrix for active features Sinv = beta * XX np.fill_diagonal(Sinv, np.diag(Sinv) + A) cholesky = True # try cholesky, if it fails go back to pinvh try: # find posterior mean : R*R.T*mean = beta*X.T*Y # solve(R*z = beta*X.T*Y) => find z => solve(R.T*mean = z) => find mean R = np.linalg.cholesky(Sinv) Z = solve_triangular(R,beta*XY, check_finite=False, lower = True) Mn = solve_triangular(R.T,Z, check_finite=False, lower = False) # invert lower triangular matrix from cholesky decomposition Ri = solve_triangular(R,np.eye(A.shape[0]), check_finite=False, lower=True) if full_covar: Sn = np.dot(Ri.T,Ri) return Mn,Sn,cholesky else: return Mn,Ri,cholesky except LinAlgError: cholesky = False Sn = pinvh(Sinv) Mn = beta*np.dot(Sinv,XY) return Mn, Sn, cholesky
def fit(self, X, W=None): ''' X: data matrix, (n x d) each row corresponds to a single instance Must be shifted to zero already. W: connectivity graph, (n x n) +1 for positive pairs, -1 for negative. ''' print('SDML.fit ...', numpy.shape(X)) self.mean_ = numpy.mean(X, axis=0) X = numpy.matrix(X - self.mean_) # set up prior M #print 'X', X.shape if self.use_cov: M = np.cov(X.T) else: M = np.identity(X.shape[1]) if W is None: W = np.ones((X.shape[1], X.shape[1])) #print 'W', W.shape L = laplacian(W, normed=False) #print 'L', L.shape inner = X.dot(L.T) loss_matrix = inner.T.dot(X) #print 'loss', loss_matrix.shape #print 'pinv', pinvh(M).shape P = pinvh(M) + self.balance_param * loss_matrix #print 'P', P.shape emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) M, _ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) self.M = M C = numpy.linalg.cholesky(self.M) self.dewhiten_ = C self.whiten_ = numpy.linalg.inv(C) # U: rotation matrix, S: scaling matrix #U, S, _ = scipy.linalg.svd(M) #s = np.sqrt(S.clip(self.EPS)) #s_inv = np.diag(1./s) #s = np.diag(s) #self.whiten_ = np.dot(np.dot(U, s_inv), U.T) #self.dewhiten_ = np.dot(np.dot(U, s), U.T) #print 'M:', M print('SDML.fit done')
def mutual_incoherence(X_relevant, X_irelevant): """Mutual incoherence, as defined by formula (26a) of [Wainwright2006]. """ projector = np.dot( np.dot(X_irelevant.T, X_relevant), pinvh(np.dot(X_relevant.T, X_relevant)) ) return np.max(np.abs(projector).sum(axis=1))
def test_pinvh_nonpositive(): a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=np.float64) a = np.dot(a, a.T) u, s, vt = np.linalg.svd(a) s[0] *= -1 a = np.dot(u * s, vt) # a is now symmetric non-positive and singular a_pinv = pinv2(a) a_pinvh = pinvh(a) assert_almost_equal(a_pinv, a_pinvh)
def _log_multivariate_normal_density_tied(X, means, covars): """Compute Gaussian log-density at X for a tied model""" n_samples, n_dim = X.shape icv = pinvh(covars) lpr = -0.5 * (n_dim * np.log(2 * np.pi) + np.log(linalg.det(covars) + 0.1) + np.sum(X * np.dot(X, icv), 1)[:, np.newaxis] - 2 * np.dot(np.dot(X, icv), means.T) + np.sum(means * np.dot(means, icv), 1)) return lpr
def objective_function(self, data, location, covariance): """Objective function minimized at each step of the MCD algorithm. """ precision = pinvh(covariance) det = fast_logdet(precision) trace = np.trace( np.dot(empirical_covariance(data - location, assume_centered=True), precision)) pen = self.shrinkage * np.trace(precision) return -det + trace + pen
def _prepare_inputs(self, X, W): self.X_ = X = check_array(X) W = check_array(W, accept_sparse=True) # set up prior M if self.use_cov: self.M_ = pinvh(np.cov(X, rowvar = False)) else: self.M_ = np.identity(X.shape[1]) L = laplacian(W, normed=False) return X.T.dot(L.dot(X))
def _fit(self, pairs, y): pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up prior M if self.use_cov: X = np.vstack( {tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) M = pinvh(np.atleast_2d(np.cov(X, rowvar=False))) else: M = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) P = M + self.balance_param * loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) _, M = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) self.transformer_ = transformer_from_metric(M) return self
def fit(self, X, W): """Learn the SDML model. Parameters ---------- X : array-like, shape (n, d) data matrix, where each row corresponds to a single instance W : array-like, shape (n, n) connectivity graph, with +1 for positive pairs and -1 for negative Returns ------- self : object Returns the instance. """ loss_matrix = self._prepare_inputs(X, W) P = pinvh(self.M_) + self.balance_param * loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) self.M_, _ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) return self
def sparse_metric_as_prec(X, S, D, eta, useEmpiricalCovariance=False): nSamples, nDim = X.shape qf = link_precision(X, S, D) # Estimate the covariance if useEmpiricalCovariance: empricialCovariance = np.dot(X.T, X) / nSamples assert np.all(np.linalg.eigvalsh(empricialCovariance) >= 0) empiricalPrecision = pinvh(empricialCovariance) assert np.all(np.linalg.eigvalsh(empiricalPrecision) >= 0) M0 = empiricalPrecision else: M0 = np.eye(nDim) return M0 + eta * qf
def _condition(self, i1, i2, X): cov_12 = self.covariance[np.ix_(i1, i2)] cov_11 = self.covariance[np.ix_(i1, i1)] cov_22 = self.covariance[np.ix_(i2, i2)] prec_22 = pinvh(cov_22) regression_coeffs = cov_12.dot(prec_22) if X.ndim == 2: mean = self.mean[i1] + regression_coeffs.dot( (X - self.mean[i2]).T).T elif X.ndim == 1: mean = self.mean[i1] + regression_coeffs.dot(X - self.mean[i2]) else: raise ValueError("%d dimensions are not allowed for X!" % X.ndim) covariance = cov_11 - regression_coeffs.dot(cov_12.T) return mean, covariance
def fit(self, X, y=None): """Fits a Minimum Covariance Determinant with the FastMCD algorithm. Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. y: not used, present for API consistence purpose. Returns ------- self: object Returns self. """ n_samples, n_features = X.shape # check that the empirical covariance is full rank if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features: warnings.warn("The covariance matrix associated to your dataset " "is not full rank") # compute and store raw estimates raw_location, raw_covariance, raw_support = fast_mcd( X, objective_function=self.objective_function, h=self.h, cov_computation_method=self._nonrobust_covariance) if self.h is None: self.h = int(np.ceil(0.5 * (n_samples + n_features + 1))) \ / float(n_samples) if self.assume_centered: raw_location = np.zeros(n_features) raw_covariance = self._nonrobust_covariance(X[raw_support], assume_centered=True) # get precision matrix in an optimized way precision = pinvh(raw_covariance) raw_dist = np.sum(np.dot(X, precision) * X, 1) self.raw_location_ = raw_location self.raw_covariance_ = raw_covariance self.raw_support_ = raw_support self.location_ = raw_location self.support_ = raw_support self.dist_ = raw_dist # obtain consistency at normal models self.correct_covariance(X) return self
def fit(self, X, y=None): """Fits a Minimum Covariance Determinant with the FastMCD algorithm. Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. y: not used, present for API consistence purpose. Returns ------- self: object Returns self. """ n_samples, n_features = X.shape # check that the empirical covariance is full rank if (linalg.svdvals(np.dot(X.T, X)) > 1e-8).sum() != n_features: warnings.warn("The covariance matrix associated to your dataset " "is not full rank") # compute and store raw estimates raw_location, raw_covariance, raw_support = fast_mcd( X, objective_function=self.objective_function, h=self.h, cov_computation_method=self._nonrobust_covariance) if self.h is None: self.h = int(np.ceil(0.5 * (n_samples + n_features + 1))) \ / float(n_samples) if self.assume_centered: raw_location = np.zeros(n_features) raw_covariance = self._nonrobust_covariance( X[raw_support], assume_centered=True) # get precision matrix in an optimized way precision = pinvh(raw_covariance) raw_dist = np.sum(np.dot(X, precision) * X, 1) self.raw_location_ = raw_location self.raw_covariance_ = raw_covariance self.raw_support_ = raw_support self.location_ = raw_location self.support_ = raw_support self.dist_ = raw_dist # obtain consistency at normal models self.correct_covariance(X) return self
def launch_rmcdl1_on_dataset(n_samples, n_features, n_outliers): rand_gen = np.random.RandomState(0) data = rand_gen.randn(n_samples, n_features) # add some outliers outliers_index = rand_gen.permutation(n_samples)[:n_outliers] outliers_offset = 10. * \ (rand_gen.randint(2, size=(n_outliers, n_features)) - 0.5) data[outliers_index] += outliers_offset inliers_mask = np.ones(n_samples).astype(bool) inliers_mask[outliers_index] = False # compute RMCD by fitting an object rmcd_fit = RMCDl1().fit(data) T = rmcd_fit.location_ S = rmcd_fit.covariance_ # compare with the true location and precision error_location = np.mean(T ** 2) assert(error_location < 1.) error_cov = np.mean((np.eye(n_features) - pinvh(S)) ** 2) assert(error_cov < 1.)
def _posterior_dist(self,X,y,A,intercept_prior): ''' Uses Laplace approximation for calculating posterior distribution ''' if self.solver == 'lbfgs_b': f = lambda w: _logistic_cost_grad(X,y,w,A,intercept_prior) w_init = np.random.random(X.shape[1]) Mn = fmin_l_bfgs_b(f, x0 = w_init, pgtol = self.tol_solver, maxiter = self.n_iter_solver)[0] Xm = np.dot(X,Mn) s = expit(Xm) B = logistic._pdf(Xm) # avoids underflow S = np.dot(X.T*B,X) np.fill_diagonal(S, np.diag(S) + A) t_hat = Xm + (y - s) / B Sn = pinvh(S) elif self.solver == 'newton_cg': # TODO: Implement Newton-CG raise NotImplementedError(('Newton Conjugate Gradient optimizer ' 'is not currently supported')) return [Mn,Sn,B,t_hat]
def _posterior_dist_local(self, X, y, A, tol_mul=1.0): ''' Uses Laplace approximation for calculating posterior distribution for local relevance vectors ''' f_full = lambda w: _gaussian_cost_grad(X, y, w, A) attempts = 1 a = -2 b = 2 for i in range(attempts): w_init = a + np.random.random(X.shape[1]) * (b - a) Mn = fmin_l_bfgs_b(f_full, x0=w_init, pgtol=tol_mul * self.tol_solver, maxiter=int(self.n_iter_solver / tol_mul))[0] check_sign = [ 0 if Mn[j] * (y[j] - 0.5) >= 0 else 1 for j in range(len(Mn)) ] if sum(check_sign) / len(Mn) < 0.1: break Xm_nobias = np.dot(X, Mn) Xm = Xm_nobias + self.fixed_intercept t = (y - 0.5) * 2 eta = norm.pdf(t * Xm) * t / norm.cdf(Xm * t) + 1e-300 B = eta * (Xm + eta) S = np.dot(X.T * B, X) np.fill_diagonal(S, np.diag(S) + A) t_hat = Xm_nobias + eta / B cholesky = True # try using Cholesky , if it fails then fall back on pinvh try: R = np.linalg.cholesky(S) Sn = solve_triangular(R, np.eye(A.shape[0]), check_finite=False, lower=True) except LinAlgError: Sn = pinvh(S) cholesky = False return [Mn, Sn, B, t_hat, cholesky]
def fit(self, X, W): """Learn the SDML model. Parameters ---------- X : array-like, shape (n, d) data matrix, where each row corresponds to a single instance W : array-like, shape (n, n) connectivity graph, with +1 for positive pairs and -1 for negative Returns ------- self : object Returns the instance. """ loss_matrix = self._prepare_inputs(X, W) P = self.M_ + self.balance_param * loss_matrix emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) _, self.M_ = graph_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) return self
def to_probability_density(self, X): """Compute probability density. Parameters ---------- X : array-like, shape (n_samples, n_features) Data. Returns ------- p : array, shape (n_samples,) Probability densities of data. """ X = np.atleast_2d(X) n_samples, n_features = X.shape precision = pinvh(self.covariance) d = X - self.mean normalization = 1 / np.sqrt((2 * np.pi) ** n_features * np.linalg.det(self.covariance)) p = np.ndarray(n_samples) for n in range(n_samples): p[n] = normalization * np.exp(-0.5 * d[n].dot(precision).dot(d[n])) return p
def conditional_distribution(self, x, indices=np.array([0])): """ Conditional gaussian distribution See https://en.wikipedia.org/wiki/Multivariate_normal_distribution#Conditional_distributions Return ------ conditional : GMM Conditional GMM distribution p(Y | X=x) """ n_features = self.means_.shape[1] - len(indices) expected_means = np.empty((self.n_components, n_features)) expected_covars = np.empty((self.n_components, n_features, n_features)) expected_weights = np.empty(self.n_components) # Highly inspired from https://github.com/AlexanderFabisch/gmr # Compute expexted_means, expexted_covars, given input X for i, (mean, covar, weight) in enumerate(zip(self.means_, self.covars_, self.weights_)): i1, i2 = invert_indices(mean.shape[0], indices), indices cov_12 = covar[np.ix_(i1, i2)] cov_11 = covar[np.ix_(i1, i1)] cov_22 = covar[np.ix_(i2, i2)] prec_22 = pinvh(cov_22) regression_coeffs = cov_12.dot(prec_22) if x.ndim == 1: x = x[:, np.newaxis] expected_means[i] = mean[i1] + regression_coeffs.dot((x - mean[i2]).T).T expected_covars[i] = cov_11 - regression_coeffs.dot(cov_12.T) expected_weights[i] = weight * \ multivariate_normal.pdf(x, mean=mean[indices], cov=covar[np.ix_(indices, indices)]) expected_weights /= expected_weights.sum() return expected_means, expected_covars, expected_weights
def graph_lasso(emp_cov, alpha, tol=1e-4, max_iter=100): _, n_features = emp_cov.shape covariance_ = emp_cov.copy() covariance_ *= 0.95 diagonal = emp_cov.flat[::n_features + 1] covariance_.flat[::n_features + 1] = diagonal precision_ = pinvh(covariance_) indices = np.arange(n_features) eps = np.finfo(np.float64).eps for i in range(max_iter): for idx in range(n_features): sub_covariance = np.ascontiguousarray( covariance_[indices != idx].T[indices != idx]) row = emp_cov[idx, indices != idx] # Use coordinate descent coefs = -(precision_[indices != idx, idx] / (precision_[idx, idx] + 1000 * eps)) coefs, _, _, _ = cd_fast.enet_coordinate_descent_gram( coefs, alpha, 0, sub_covariance, row, row, max_iter, tol, check_random_state(None), False) # Update the precision matrix precision_[idx, idx] = ( 1. / (covariance_[idx, idx] - np.dot(covariance_[indices != idx, idx], coefs))) precision_[indices != idx, idx] = (-precision_[idx, idx] * coefs) precision_[idx, indices != idx] = (-precision_[idx, idx] * coefs) coefs = np.dot(sub_covariance, coefs) covariance_[idx, indices != idx] = coefs covariance_[indices != idx, idx] = coefs return covariance_, precision_
def _posterior_dist_global(self, X, y, A, tol_solver, n_iter_solver): ''' Uses Laplace approximation for calculating posterior distribution for all relevance vectors. ''' f_full = lambda w: _gaussian_cost_grad(X, y, w, A) attempts = 10 a = -2 b = 2 # Sometimes, fmin_l_bfgs_b fails to find a good minimizer. Retry with different initial point. for i in range(attempts): w_init = a + np.random.random(X.shape[1]) * (b - a) Mn = fmin_l_bfgs_b(f_full, x0=w_init, pgtol=tol_solver, maxiter=n_iter_solver)[0] check_sign = [ 0 if Mn[j] * (y[j] - 0.5) >= 0 else 1 for j in range(len(Mn)) ] if sum(check_sign) / len(Mn) < 0.1: break Xm = np.dot(X, Mn) + self.fixed_intercept t = (y - 0.5) * 2 eta = norm.pdf(t * Xm) * t / norm.cdf(Xm * t) + 1e-300 S = np.matmul(X.T * eta * (Xm + eta), X) + np.diag(A) cholesky = True # try using Cholesky , if it fails then fall back on pinvh try: R = np.linalg.cholesky(S) Sn = solve_triangular(R, np.eye(A.shape[0]), check_finite=False, lower=True) except LinAlgError: Sn = pinvh(S) cholesky = False return [Mn, Sn, cholesky]
def _posterior_dist(self,X,y,A): ''' Uses Laplace approximation for calculating posterior distribution ''' f = lambda w: _logistic_cost_grad(X,y,w,A) w_init = np.random.random(X.shape[1]) Mn = fmin_l_bfgs_b(f, x0 = w_init, pgtol = self.tol_solver, maxiter = self.n_iter_solver)[0] Xm = np.dot(X,Mn) s = expit(Xm) B = logistic._pdf(Xm) # avoids underflow S = np.dot(X.T*B,X) np.fill_diagonal(S, np.diag(S) + A) t_hat = y - s cholesky = True # try using Cholesky , if it fails then fall back on pinvh try: R = np.linalg.cholesky(S) Sn = solve_triangular(R,np.eye(A.shape[0]), check_finite=False,lower=True) except LinAlgError: Sn = pinvh(S) cholesky = False return [Mn,Sn,B,t_hat,cholesky]
def ridge_evidence_iter(X, y, penalize_bias=False, maxvalue=1e6, maxiter=1e3, tolerance=1e-3, verbose=1, alpha0=1.): """Evidence optimization of ridge regression using fixed-point algorithm. See Park and Pillow PLOS Comp Biol 2011 for details. """ N, p = X.shape XTX = np.dot(X.T, X) XTy = np.dot(X.T, y) yTy = np.sum(y * y) # Inverse prior variance alpha = 10. S = np.eye(p) I = np.eye(p) if not penalize_bias: S[0, 0] = 0 # Initialize mean and noise variance using ridge MAP estimate mu = linalg.solve(XTX + alpha * I, XTy, sym_pos=False) noisevar = yTy - 2 * np.dot(mu.T, XTy) + np.dot(mu.T, XTX).dot(mu) alpha = alpha0 / noisevar niter = 0 t0 = time.time() while True: alpha_old = alpha noisevar_old = noisevar Cprior_inv = alpha * I # Mean and covariance of posterior try: S = linalg.inv(XTX / noisevar + Cprior_inv) except: S = pinvh(XTX / noisevar + Cprior_inv) mu = np.dot(S, XTy) / noisevar # Compute new parameters alpha = (p - alpha * np.trace(S)) / np.sum(mu**2) alpha = float(alpha) noisevar = np.sum((y - np.dot(X, mu)) ** 2) \ / (N - np.sum(1 - alpha*np.diag(S))) dd = np.abs(alpha_old - alpha) + np.abs(noisevar_old - noisevar) if dd < tolerance or alpha > maxvalue or niter > maxiter: break niter += 1 if verbose > 1: print("%d | alpha=%0.3f | noisevar=%0.3f | %g | %0.2f s" %\ (niter, alpha, noisevar, dd, time.time() - t0)) if verbose > 0: t_fit = time.time() - t0 print("Ridge: finished after %d iterations (%0.2f s)" % (niter, t_fit)) return mu, S, alpha, noisevar
def set_optimal_shrinkage_amount(self, X, method="cv", verbose=False): """Set optimal shrinkage amount according to chosen method. /!\ Could be rewritten with GridSearchCV. Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. method: float or str in {"cv", "lw", "oas"}, The method used to set the shrinkage. If a floating value is provided that value is used. Otherwise, the selection is made according to the selected method. "cv" (default): 10-fold cross-validation. (or Leave-One Out cross-validation if n_samples < 10) "lw": Ledoit-Wolf criterion "oas": OAS criterion verbose: bool, Verbose mode or not. Returns ------- optimal_shrinkage: float, The optimal amount of shrinkage. """ n_samples, n_features = X.shape if isinstance(method, str): std_shrinkage = np.trace(empirical_covariance(X)) / \ (n_features * n_samples) self.std_shrinkage = std_shrinkage if method == "cv": from sklearn.covariance import log_likelihood n_samples, n_features = X.shape shrinkage_range = np.concatenate(( [0.], 10. ** np.arange(-n_samples / n_features, -1, 0.5), np.arange(0.05, 1., 0.05), np.arange(1., 20., 1.), np.arange(20., 100, 5.), 10. ** np.arange(2, 7, 0.5))) # get a "pure" active set with a standard shrinkage active_set_estimator = RMCDl2(shrinkage=std_shrinkage) active_set_estimator.fit(X) active_set = np.where(active_set_estimator.support_)[0] # split this active set in ten parts active_set = active_set[np.random.permutation(active_set.size)] if active_set.size >= 10: # ten fold cross-validation n_folds = 10 fold_size = active_set.size / 10 else: n_folds = active_set.size fold_size = 1 log_likelihoods = np.zeros((shrinkage_range.size, n_folds)) if verbose: print "*** Cross-validation" for trial in range(n_folds): if verbose: print trial / float(n_folds) # define train and test sets train_set_indices = np.concatenate( (np.arange(0, fold_size * trial), np.arange(fold_size * (trial + 1), n_folds * fold_size))) train_set = X[active_set[train_set_indices]] test_set = X[active_set[np.arange( fold_size * trial, fold_size * (trial + 1))]] # learn location and covariance estimates from train set # for several amounts of shrinkage for i, shrinkage in enumerate(shrinkage_range): location = test_set.mean(0) cov = empirical_covariance(train_set) cov.flat[::(n_features + 1)] += shrinkage * std_shrinkage # compute test data likelihood log_likelihoods[i, trial] = log_likelihood( empirical_covariance(test_set - location, assume_centered=True), pinvh(cov)) optimal_shrinkage = shrinkage_range[ np.argmax(log_likelihoods.mean(1))] self.shrinkage = optimal_shrinkage * std_shrinkage self.shrinkage_cst = optimal_shrinkage if verbose: print "optimal shrinkage: %g (%g x lambda(= %g))" \ % (self.shrinkage, optimal_shrinkage, std_shrinkage) self.log_likelihoods = log_likelihoods self.shrinkage_range = shrinkage_range return shrinkage_range, log_likelihoods elif method == "oas": from sklearn.covariance import OAS rmcd = self.__init__(shrinkage=std_shrinkage) support = rmcd.fit(X).support_ oas = OAS().fit(X[support]) if oas.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = oas.shrinkage_ / (1. - oas.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features elif method == "lw": from sklearn.covariance import LedoitWolf rmcd = RMCDl2(self, h=self.h, shrinkage=std_shrinkage) support = rmcd.fit(X).support_ lw = LedoitWolf().fit(X[support]) if lw.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = lw.shrinkage_ / (1. - lw.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features else: pass return
def sparse_metric(X, S, D, eta, alpha): precision = sparse_metric_as_prec(X, S, D, eta=eta) emp_cov = pinvh(precision) covariance, _ = graph_lasso(emp_cov, alpha, verbose=True) return covariance
def c_step(X, h, objective_function, initial_estimates, verbose=False, cov_computation_method=empirical_covariance): """C_step procedure described in [1] aiming at computing the MCD Parameters ---------- X: array-like, shape (n_samples, n_features) Data set in which we look for the h observations whose scatter matrix has minimum determinant h: int, > n_samples / 2 Number of observations to compute the ribust estimates of location and covariance from. remaining_iterations: int Number of iterations to perform. According to Rousseeuw [1], two iterations are sufficient to get close to the minimum, and we never need more than 30 to reach convergence. initial_estimates: 2-tuple Initial estimates of location and shape from which to run the c_step procedure: - initial_estimates[0]: an initial location estimate - initial_estimates[1]: an initial covariance estimate verbose: boolean Verbose mode Returns ------- location: array-like, shape (n_features,) Robust location estimates covariance: array-like, shape (n_features, n_features) Robust covariance estimates support: array-like, shape (n_samples,) A mask for the `h` observations whose scatter matrix has minimum determinant Notes ----- References: [1] A Fast Algorithm for the Minimum Covariance Determinant Estimator, 1999, American Statistical Association and the American Society for Quality, TECHNOMETRICS """ n_samples, n_features = X.shape n_iter = 30 remaining_iterations = 30 # Get initial robust estimates from the function parameters location = initial_estimates[0] covariance = initial_estimates[1] # run a special iteration for that case (to get an initial support) precision = pinvh(covariance) X_centered = X - location dist = (np.dot(X_centered, precision) * X_centered).sum(1) # compute new estimates support = np.zeros(n_samples).astype(bool) support[np.argsort(dist)[:h]] = True location = X[support].mean(0) covariance = cov_computation_method(X[support]) previous_obj = np.inf # Iterative procedure for Minimum Covariance Determinant computation obj = objective_function(X[support], location, covariance) while (obj < previous_obj) and (remaining_iterations > 0): # save old estimates values previous_location = location previous_covariance = covariance previous_obj = obj previous_support = support # compute a new support from the full data set mahalanobis distances precision = pinvh(covariance) X_centered = X - location dist = (np.dot(X_centered, precision) * X_centered).sum(1) # compute new estimates support = np.zeros(n_samples).astype(bool) support[np.argsort(dist)[:h]] = True location = X[support].mean(axis=0) covariance = cov_computation_method(X[support]) obj = objective_function(X[support], location, covariance) # update remaining iterations for early stopping remaining_iterations -= 1 # Catch computation errors if np.isinf(obj): raise ValueError( "Singular covariance matrix. " "Please check that the covariance matrix corresponding " "to the dataset is full rank and that MCD is used with " "Gaussian-distributed data (or at least data drawn from a " "unimodal, symetric distribution.") # Check convergence if np.allclose(obj, previous_obj): # c_step procedure converged if verbose: print "Optimal couple (location, covariance) found before" \ "ending iterations (%d left)" % (remaining_iterations) results = location, covariance, obj, support elif obj > previous_obj: # objective function has increased (should not happen) current_iter = n_iter - remaining_iterations warnings.warn("Warning! obj > previous_obj (%.15f > %.15f, iter=%d)" \ % (obj, previous_obj, current_iter), RuntimeWarning) results = previous_location, previous_covariance, \ previous_obj, previous_support # Check early stopping if remaining_iterations == 0: if verbose: print 'Maximum number of iterations reached' obj = fast_logdet(covariance) results = location, covariance, obj, support return results
def test_pinvh_simple_complex(): a = (np.array([[1, 2, 3], [4, 5, 6], [7, 8, 10]]) + 1j * np.array([[10, 8, 7], [6, 5, 4], [3, 2, 1]])) a = np.dot(a, a.conj().T) a_pinv = pinvh(a) assert_almost_equal(np.dot(a, a_pinv), np.eye(3))
def test_pinvh_simple_real(): a = np.array([[1, 2, 3], [4, 5, 6], [7, 8, 10]], dtype=np.float64) a = np.dot(a, a.T) a_pinv = pinvh(a) assert_almost_equal(np.dot(a, a_pinv), np.eye(3))
def mutual_incoherence(x_relevant, x_irelevant): projector = np.dot(np.dot(x_irelevant.T, x_relevant), pinvh(np.dot(x_relevant.T, x_relevant))) return np.max(np.abs(projector).sum(axis=1))
def mutual_incoherence(X_relevant, X_irelevant): """Mutual incoherence, as defined by formula (26a) of [Wainwright2006]. """ projector = np.dot(np.dot(X_irelevant.T, X_relevant), pinvh(np.dot(X_relevant.T, X_relevant))) return np.max(np.abs(projector).sum(axis=1))
def set_optimal_shrinkage_amount(self, X, method="cv", verbose=False): """Set optimal shrinkage amount according to chosen method. /!\ Could be rewritten with GridSearchCV. Parameters ---------- X: array-like, shape = [n_samples, n_features] Training data, where n_samples is the number of samples and n_features is the number of features. method: float or str in {"cv", "lw", "oas"}, The method used to set the shrinkage. If a floating value is provided that value is used. Otherwise, the selection is made according to the selected method. "cv" (default): 10-fold cross-validation. (or Leave-One Out cross-validation if n_samples < 10) "lw": Ledoit-Wolf criterion "oas": OAS criterion verbose: bool, Verbose mode or not. Returns ------- optimal_shrinkage: float, The optimal amount of shrinkage. """ n_samples, n_features = X.shape if isinstance(method, str): std_shrinkage = np.trace(empirical_covariance(X)) / \ (n_features * n_samples) self.std_shrinkage = std_shrinkage if method == "cv": from sklearn.covariance import log_likelihood n_samples, n_features = X.shape shrinkage_range = np.concatenate( ([0.], 10.**np.arange(-n_samples / n_features, -1, 0.5), np.arange(0.05, 1., 0.05), np.arange(1., 20., 1.), np.arange(20., 100, 5.), 10.**np.arange(2, 7, 0.5))) # get a "pure" active set with a standard shrinkage active_set_estimator = RMCDl2(shrinkage=std_shrinkage) active_set_estimator.fit(X) active_set = np.where(active_set_estimator.support_)[0] # split this active set in ten parts active_set = active_set[np.random.permutation(active_set.size)] if active_set.size >= 10: # ten fold cross-validation n_folds = 10 fold_size = active_set.size / 10 else: n_folds = active_set.size fold_size = 1 log_likelihoods = np.zeros((shrinkage_range.size, n_folds)) if verbose: print "*** Cross-validation" for trial in range(n_folds): if verbose: print trial / float(n_folds) # define train and test sets train_set_indices = np.concatenate( (np.arange(0, fold_size * trial), np.arange(fold_size * (trial + 1), n_folds * fold_size))) train_set = X[active_set[train_set_indices]] test_set = X[active_set[np.arange(fold_size * trial, fold_size * (trial + 1))]] # learn location and covariance estimates from train set # for several amounts of shrinkage for i, shrinkage in enumerate(shrinkage_range): location = test_set.mean(0) cov = empirical_covariance(train_set) cov.flat[::(n_features + 1)] += shrinkage * std_shrinkage # compute test data likelihood log_likelihoods[i, trial] = log_likelihood( empirical_covariance(test_set - location, assume_centered=True), pinvh(cov)) optimal_shrinkage = shrinkage_range[np.argmax( log_likelihoods.mean(1))] self.shrinkage = optimal_shrinkage * std_shrinkage self.shrinkage_cst = optimal_shrinkage if verbose: print "optimal shrinkage: %g (%g x lambda(= %g))" \ % (self.shrinkage, optimal_shrinkage, std_shrinkage) self.log_likelihoods = log_likelihoods self.shrinkage_range = shrinkage_range return shrinkage_range, log_likelihoods elif method == "oas": from sklearn.covariance import OAS rmcd = self.__init__(shrinkage=std_shrinkage) support = rmcd.fit(X).support_ oas = OAS().fit(X[support]) if oas.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = oas.shrinkage_ / (1. - oas.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features elif method == "lw": from sklearn.covariance import LedoitWolf rmcd = RMCDl2(self, h=self.h, shrinkage=std_shrinkage) support = rmcd.fit(X).support_ lw = LedoitWolf().fit(X[support]) if lw.shrinkage_ == 1: self.shrinkage_cst = np.inf else: self.shrinkage_cst = lw.shrinkage_ / (1. - lw.shrinkage_) self.shrinkage = self.shrinkage_cst * std_shrinkage * n_features else: pass return