def test_graphical_lasso_iris(): # Hard-coded solution from R glasso package for alpha=1.0 # (need to set penalize.diagonal to FALSE) cov_R = np.array( [ [0.68112222, 0.0000000, 0.265820, 0.02464314], [0.00000000, 0.1887129, 0.000000, 0.00000000], [0.26582000, 0.0000000, 3.095503, 0.28697200], [0.02464314, 0.0000000, 0.286972, 0.57713289], ] ) icov_R = np.array( [ [1.5190747, 0.000000, -0.1304475, 0.0000000], [0.0000000, 5.299055, 0.0000000, 0.0000000], [-0.1304475, 0.000000, 0.3498624, -0.1683946], [0.0000000, 0.000000, -0.1683946, 1.8164353], ] ) X = datasets.load_iris().data emp_cov = empirical_covariance(X) for method in ("cd", "lars"): cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R) assert_array_almost_equal(icov, icov_R)
def test_graphical_lasso_iris_singular(): # Small subset of rows to test the rank-deficient case # Need to choose samples such that none of the variances are zero indices = np.arange(10, 13) # Hard-coded solution from R glasso package for alpha=0.01 cov_R = np.array([ [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149], [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222], [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009], [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222] ]) icov_R = np.array([ [24.42244057, -16.831679593, 0.0, 0.0], [-16.83168201, 24.351841681, -6.206896552, -12.5], [0.0, -6.206896171, 153.103448276, 0.0], [0.0, -12.499999143, 0.0, 462.5] ]) X = datasets.load_iris().data[indices, :] emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graphical_lasso(emp_cov, alpha=0.01, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R, decimal=5) assert_array_almost_equal(icov, icov_R, decimal=5)
def graphical_lasso_wrap(emp_cov, alpha, max_iter): try: _, precision = graphical_lasso(emp_cov[0], alpha=alpha, max_iter=max_iter) return precision except FloatingPointError: return graphical_lasso_wrap(emp_cov, alpha=alpha * 1.1, max_iter=max_iter)
def test_graphical_lasso_iris_singular(): # Small subset of rows to test the rank-deficient case # Need to choose samples such that none of the variances are zero indices = np.arange(10, 13) # Hard-coded solution from R glasso package for alpha=0.01 cov_R = np.array([ [0.08, 0.056666662595, 0.00229729713223, 0.00153153142149], [0.056666662595, 0.082222222222, 0.00333333333333, 0.00222222222222], [0.002297297132, 0.003333333333, 0.00666666666667, 0.00009009009009], [0.001531531421, 0.002222222222, 0.00009009009009, 0.00222222222222], ]) icov_R = np.array([ [24.42244057, -16.831679593, 0.0, 0.0], [-16.83168201, 24.351841681, -6.206896552, -12.5], [0.0, -6.206896171, 153.103448276, 0.0], [0.0, -12.499999143, 0.0, 462.5], ]) X = datasets.load_iris().data[indices, :] emp_cov = empirical_covariance(X) for method in ("cd", "lars"): cov, icov = graphical_lasso(emp_cov, alpha=0.01, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R, decimal=5) assert_array_almost_equal(icov, icov_R, decimal=5)
def fit_qda(self, data, lbls, idx): self.di_moments = dict( zip(self.uy, [{ 'mu': np.repeat(0, self.p).astype(float), 'Sigma': np.zeros([self.p, self.p]), 'iSigma': np.zeros([self.p, self.p]) } for z in self.uy])) for yy in self.uy: self.di_moments[yy]['n'] = self.ny[yy] for ii in idx[yy]: x_ii = np.c_[self.enc.cenc.transform(data.iloc[ii, self.enc.cidx]), self.enc.nenc.transform(data.iloc[ii, self.enc.nidx])] self.di_moments[yy]['mu'] += x_ii.sum(axis=0) self.di_moments[yy]['Sigma'] += x_ii.T.dot(x_ii) # Adjust raw numbers self.di_moments[yy]['mu'] = self.di_moments[yy]['mu'].reshape( [self.p, 1]) / self.ny[yy] self.di_moments[yy]['Sigma'] = ( self.di_moments[yy]['Sigma'] - self.ny[yy] * self.di_moments[yy]['mu'].dot(self.di_moments[yy]['mu'].T)) / ( self.ny[yy] - 1) #self.di_moments[yy]['ldet'] = np.log(np.linalg.det(self.di_moments[yy]['Sigma'])) #self.di_moments[yy]['iSigma'] = np.linalg.pinv(self.di_moments[yy]['Sigma']) self.di_moments[yy]['iSigma'] = graphical_lasso( emp_cov=self.di_moments[yy]['Sigma'], alpha=0.001)
def test_graph_lasso_2D(): # Hard-coded solution from Python skggm package # obtained by calling `quic(emp_cov, lam=.1, tol=1e-8)` cov_skggm = np.array([[3.09550269, 1.186972], [1.186972, 0.57713289]]) icov_skggm = np.array([[1.52836773, -3.14334831], [-3.14334831, 8.19753385]]) X = datasets.load_iris().data[:, 2:] emp_cov = empirical_covariance(X) for method in ("cd", "lars"): cov, icov = graphical_lasso(emp_cov, alpha=0.1, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_skggm) assert_array_almost_equal(icov, icov_skggm)
def fit(self, X, W=None): ''' X: data matrix, (n x d) each row corresponds to a single instance Must be shifted to zero already. W: connectivity graph, (n x n) +1 for positive pairs, -1 for negative. ''' print('SDML.fit ...', numpy.shape(X)) self.mean_ = numpy.mean(X, axis=0) X = numpy.matrix(X - self.mean_) # set up prior M #print 'X', X.shape if self.use_cov: M = np.cov(X.T) else: M = np.identity(X.shape[1]) if W is None: W = np.ones((X.shape[1], X.shape[1])) #print 'W', W.shape L = laplacian(W, normed=False) #print 'L', L.shape inner = X.dot(L.T) loss_matrix = inner.T.dot(X) #print 'loss', loss_matrix.shape #print 'pinv', pinvh(M).shape P = pinvh(M) + self.balance_param * loss_matrix #print 'P', P.shape emp_cov = pinvh(P) # hack: ensure positive semidefinite emp_cov = emp_cov.T.dot(emp_cov) M, _ = graphical_lasso(emp_cov, self.sparsity_param, verbose=self.verbose) self.M = M C = numpy.linalg.cholesky(self.M) self.dewhiten_ = C self.whiten_ = numpy.linalg.inv(C) # U: rotation matrix, S: scaling matrix #U, S, _ = scipy.linalg.svd(M) #s = np.sqrt(S.clip(self.EPS)) #s_inv = np.diag(1./s) #s = np.diag(s) #self.whiten_ = np.dot(np.dot(U, s_inv), U.T) #self.dewhiten_ = np.dot(np.dot(U, s), U.T) #print 'M:', M print('SDML.fit done')
def test_graph_lasso_2D(): # Hard-coded solution from Python skggm package # obtained by calling `quic(emp_cov, lam=.1, tol=1e-8)` cov_skggm = np.array([[3.09550269, 1.186972], [1.186972, 0.57713289]]) icov_skggm = np.array([[1.52836773, -3.14334831], [-3.14334831, 8.19753385]]) X = datasets.load_iris().data[:, 2:] emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graphical_lasso(emp_cov, alpha=.1, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_skggm) assert_array_almost_equal(icov, icov_skggm)
def update_omega(self, X, Y, W, b, N, K, sample_weight=None): """ Update conditional covariance matrix among responeses. If assume sparse, then solve by graphical lasso (implemented by scikit_learn). Note that this option sometimes encounter warning related to PSD from the graphical lasso implementation. Parameters ---------- X: numpy array N x D, features Y: numpy array N x K, responses W: numpy array D x K, coefficients of tasks b: numpy array K x 1, intercepts of tasks N: integer sample size K: integer dimension of tasks/responses sample_weight: numpy array N x 1, weight of each sample Returns ------- omega: numpy array K x K, conditional covariance among responeses omega_i: numpy array K x K, inverse of omega """ _dif = Y - X @ W - b H = np.diag(sample_weight) if self.sparse_omega: omega, omega_i = graphical_lasso((_dif.T @ _dif) / N, self.lam2) else: omega = (_dif.T @ H @ _dif + self.lam2 * np.identity(K)) / np.sum(H) omega_i = np.linalg.inv(omega) return omega, omega_i
def test_graphical_lasso(random_state=0): # Sample area_data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True, alpha=alpha, mode=method) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4) # Smoke test the estimator model = GraphicalLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered area_data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphicalLasso( assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def _graphical_lasso(self, expected_value_resid_square, alpha=None, normalize_param=None, Sigma_init=None): """ Given Gamma, we estimate Omega, the graphical lasso solution for the precision matrix :param expected_value_resid_square: :param Sigma_init: :param normalize_param: number of rows in Y matrix. This is not self.data.n when we use CV. :return: """ if not normalize_param: normalize_param = self.data.n if not alpha: alpha = self.alpha expected_value_resid_square *= (1 / normalize_param) expected_value_resid_square = np.array(expected_value_resid_square) # TODO: for now we do not do CV since the func that does this requres (Y-Z\Gamma), # and not (Y-Z\Gamma)^T(Y-Z\Gamma). In theory, we can do SVD of the latter to return to (Y-Z\Gamma), # but this is weird, because this is an expectation... If we think about it, in the CV func we will get # (Y-Z\Gamma)^T(Y-Z\Gamma) again as the covariance, so it's probably OK. if Sigma_init is not None: Sigma_init = np.matrix(Sigma_init) mode = 'cd' if self.data.n < self.data.p: # We preffer the LARS solver for very sparse underlying graphs # TODO: move this so we want need to check this every iteration mode = 'lars' Sigma, Omega = graphical_lasso(expected_value_resid_square, alpha=alpha, cov_init=Sigma_init, max_iter=self.glasso_max_iter, mode=mode) return Sigma, Omega
def test_graphical_lasso(random_state=0): # Sample data from a sparse multivariate normal dim = 20 n_samples = 100 random_state = check_random_state(random_state) prec = make_sparse_spd_matrix(dim, alpha=.95, random_state=random_state) cov = linalg.inv(prec) X = random_state.multivariate_normal(np.zeros(dim), cov, size=n_samples) emp_cov = empirical_covariance(X) for alpha in (0., .1, .25): covs = dict() icovs = dict() for method in ('cd', 'lars'): cov_, icov_, costs = graphical_lasso(emp_cov, return_costs=True, alpha=alpha, mode=method) covs[method] = cov_ icovs[method] = icov_ costs, dual_gap = np.array(costs).T # Check that the costs always decrease (doesn't hold if alpha == 0) if not alpha == 0: assert_array_less(np.diff(costs), 0) # Check that the 2 approaches give similar results assert_array_almost_equal(covs['cd'], covs['lars'], decimal=4) assert_array_almost_equal(icovs['cd'], icovs['lars'], decimal=4) # Smoke test the estimator model = GraphicalLasso(alpha=.25).fit(X) model.score(X) assert_array_almost_equal(model.covariance_, covs['cd'], decimal=4) assert_array_almost_equal(model.covariance_, covs['lars'], decimal=4) # For a centered matrix, assume_centered could be chosen True or False # Check that this returns indeed the same result for centered data Z = X - X.mean(0) precs = list() for assume_centered in (False, True): prec_ = GraphicalLasso( assume_centered=assume_centered).fit(Z).precision_ precs.append(prec_) assert_array_almost_equal(precs[0], precs[1])
def test_graphical_lasso_iris(): # Hard-coded solution from R glasso package for alpha=1.0 # (need to set penalize.diagonal to FALSE) cov_R = np.array([ [0.68112222, 0.0000000, 0.265820, 0.02464314], [0.00000000, 0.1887129, 0.000000, 0.00000000], [0.26582000, 0.0000000, 3.095503, 0.28697200], [0.02464314, 0.0000000, 0.286972, 0.57713289] ]) icov_R = np.array([ [1.5190747, 0.000000, -0.1304475, 0.0000000], [0.0000000, 5.299055, 0.0000000, 0.0000000], [-0.1304475, 0.000000, 0.3498624, -0.1683946], [0.0000000, 0.000000, -0.1683946, 1.8164353] ]) X = datasets.load_iris().data emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R) assert_array_almost_equal(icov, icov_R)
def glasso_R(data, alphas, mode='cd'): """ Estimates the graph with graphical lasso based on its implementation in R. Parameters ---------- data: numpy ndarray The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows. alphas: float Non-negative regularization parameter of the graphical lasso algorithm. Returns ------- adjacency matrix : the estimated adjacency matrix. """ scaler = StandardScaler() data = scaler.fit_transform(data) _ , n_samples = data.shape cov_emp = np.dot(data.T, data) / n_samples covariance, precision_matrix = graphical_lasso(emp_cov=cov_emp, alpha=alphas, mode=mode) adjacency_matrix = precision_matrix.astype(bool).astype(int) adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0 return adjacency_matrix
def test_graphical_lasso_iris(): # Hard-coded solution from R glasso package for alpha=1.0 # The iris datasets in R and scikit-learn do not match in a few places, # these values are for the scikit-learn version. cov_R = np.array([ [0.68112222, 0.0, 0.2651911, 0.02467558], [0.00, 0.1867507, 0.0, 0.00], [0.26519111, 0.0, 3.0924249, 0.28774489], [0.02467558, 0.0, 0.2877449, 0.57853156] ]) icov_R = np.array([ [1.5188780, 0.0, -0.1302515, 0.0], [0.0, 5.354733, 0.0, 0.0], [-0.1302515, 0.0, 0.3502322, -0.1686399], [0.0, 0.0, -0.1686399, 1.8123908] ]) X = datasets.load_iris().data emp_cov = empirical_covariance(X) for method in ('cd', 'lars'): cov, icov = graphical_lasso(emp_cov, alpha=1.0, return_costs=False, mode=method) assert_array_almost_equal(cov, cov_R) assert_array_almost_equal(icov, icov_R)
def ta_sparse_covariance(df: Typing.PatchedPandas, convert_to='returns', covariance='ewma', cov_arg=0.97, rho=0.1, inverse=False, **kwargs): from sklearn.covariance import graphical_lasso if covariance in ['ewma', 'weighted']: cov_func = ta_ewma_covariance elif covariance in ['rolling', 'moving']: cov_func = ta_moving_covariance elif covariance in ['garch', 'mgarch']: cov_func = ta_mgarch_covariance else: raise ValueError("unknown covariance expected one of [ewma, moving]") return \ cov_func(df, cov_arg, convert_to) \ .groupby(level=0) \ .apply(lambda x: x if x.isnull().values.any() else \ _pd.DataFrame(graphical_lasso(x.values, rho, **kwargs)[int(inverse)], index=x.index, columns=x.columns))
def _fit(self, pairs, y): if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") else: if self.verbose: print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up (the inverse of) the prior M if self.use_cov: X = np.vstack( {tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) else: prior_inv = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) min_eigval = np.min(w) if min_eigval < 0.: warnings.warn( "Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set use_cov=False.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems sigma0 = (V * w).dot(V.T) try: if HAS_SKGGM: theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, verbose=self.verbose, cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False not_finite = False # not_finite not applicable here so we set to False if raised_error is not None or not_spd or not_finite: msg = ("There was a problem in SDML when using {}'s graphical " "lasso solver." ).format("skggm" if HAS_SKGGM else "scikit-learn") if not HAS_SKGGM: skggm_advice = ( " skggm's graphical lasso can sometimes converge " "on non SPD cases where scikit-learn's graphical " "lasso fails to converge. Try to install skggm and " "rerun the algorithm (see the README.md for the " "right version of skggm).") msg += skggm_advice if raised_error is not None: msg += " The following error message was thrown: {}.".format( raised_error) raise RuntimeError(msg) self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self
def _fit(self, pairs, y): if self.use_cov != 'deprecated': warnings.warn( '"use_cov" parameter is not used.' ' It has been deprecated in version 0.5.0 and will be' 'removed in 0.6.0. Use "prior" instead.', DeprecationWarning) if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") else: if self.verbose: print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up (the inverse of) the prior M # if the prior is the default (None), we raise a warning if self.prior is None: # TODO: # replace prior=None by prior='identity' in v0.6.0 and remove the # warning msg = ( "Warning, no prior was set (`prior=None`). As of version 0.5.0, " "the default prior will now be set to " "'identity', instead of 'covariance'. If you still want to use " "the inverse of the covariance matrix as a prior, " "set prior='covariance'. This warning will disappear in " "v0.6.0, and `prior` parameter's default value will be set to " "'identity'.") warnings.warn(msg, ChangedBehaviorWarning) prior = 'identity' else: prior = self.prior _, prior_inv = _initialize_metric_mahalanobis( pairs, prior, return_inverse=True, strict_pd=True, matrix_name='prior', random_state=self.random_state) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) min_eigval = np.min(w) if min_eigval < 0.: warnings.warn( "Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set prior='identity'.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems sigma0 = (V * w).dot(V.T) try: if HAS_SKGGM: theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, verbose=self.verbose, cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False not_finite = False # not_finite not applicable here so we set to False if raised_error is not None or not_spd or not_finite: msg = ("There was a problem in SDML when using {}'s graphical " "lasso solver." ).format("skggm" if HAS_SKGGM else "scikit-learn") if not HAS_SKGGM: skggm_advice = ( " skggm's graphical lasso can sometimes converge " "on non SPD cases where scikit-learn's graphical " "lasso fails to converge. Try to install skggm and " "rerun the algorithm (see the README.md for the " "right version of skggm).") msg += skggm_advice if raised_error is not None: msg += " The following error message was thrown: {}.".format( raised_error) raise RuntimeError(msg) self.components_ = components_from_metric(np.atleast_2d(M)) return self
def fit_sklearn(self): return graphical_lasso(emp_cov=self.emp_cov, alpha=self.alpha)
def fit(self, reltol=1e-5, max_itr=1000, verbose=True): nr_itr = 0 has_not_converged = True Theta0 = self.Theta_init.copy() mu0 = self.mu_init tau = np.zeros(self.N) tol = np.inf while has_not_converged and nr_itr < max_itr: print(f'{nr_itr} / {max_itr}, {tol}') if verbose: print(f' {nr_itr} / {max_itr}') if self.mu_zero: for t in range(self.N): tau[t] = (self.nu + self.N) / (self.nu + np.dot( self.x[t, :], Theta0).dot(self.x[t, :])) # S_hat = np.zeros((self.p, self.p)) # for i in range(self.N): # S_hat += np.outer(self.x[i,:], self.x[i,:]) * tau[i] S_hat = np.array([ np.outer(self.x[i, :], self.x[i, :]) * tau[i] for i in range(self.N) ]).sum(0) # print(S_hat[:5,:5]) _, Theta_t = graphical_lasso(S_hat, self.alpha) tol = np.linalg.norm(Theta_t - Theta0, 'fro') / np.linalg.norm( Theta0, 'fro') has_not_converged = (np.linalg.norm(Theta_t - Theta0, 'fro') / np.linalg.norm(Theta0, 'fro') > reltol) Theta0 = Theta_t.copy() else: for t in range(self.N): tau[t] = (self.nu + self.N) / (self.nu + np.dot( self.x[t, :] - mu0, Theta0).dot(self.x[t, :] - mu0)) sum_tau = np.sum(tau) mu_hat = np.array( [tau[i] * self.x[i, :] for i in range(self.p)]).sum(0) / sum_tau S_hat = np.array([ np.outer(self.x[i, :] - mu_hat, self.x[i, :] - mu_hat) * tau[i] for i in range(self.N) ]).sum(0) _, Theta_t = graphical_lasso(S_hat, self.alpha) tol = np.linalg.norm(Theta_t - Theta0, 'fro') / np.linalg.norm( Theta0, 'fro') has_not_converged = (tol > reltol) Theta0 = Theta_t.copy() mu0 = mu_hat.copy() nr_itr += 1 return ((self.nu - 2.0) / self.nu) * Theta_t
def markov_network(sigma2, k, lambda_vec, tol=10**-14, opt=False): """ For details, see here. Parameters ---------- sigma2 : array, shape(n_, n_) k : scalar lambda_vec : array, shape(l_,) tol : scalar opt : bool Returns ---------- sigma2_bar : array, shape(n_, n_, l_) c2_bar : array, shape(n_, n_, l_) phi2_bar : array, shape(n_, n_, l_) lambda_bar : scalar conv : scalar l_bar : scalar """ lambda_vec = np.sort(lambda_vec) l_ = len(lambda_vec) c2_bar = np.zeros(sigma2.shape + (l_, )) phi2_bar = np.zeros(sigma2.shape + (l_, )) z = np.zeros(l_) # Compute correlation c2, sigma_vec = cov_2_corr(sigma2) for l in range(l_): lam = lambda_vec[l] # perform glasso shrinkage _, invs2_tilde, *_ = graphical_lasso(c2, lam) # correlation extraction c2_tilde = np.linalg.solve(invs2_tilde, np.eye(invs2_tilde.shape[0])) c2_bar[:, :, l] = cov_2_corr(c2_tilde)[0] # estimated corr. # inv. corr. phi2_bar[:, :, l] = np.linalg.solve(c2_bar[:, :, l], np.eye(c2_bar[:, :, l].shape[0])) tmp = abs(phi2_bar[:, :, l]) z[l] = np.sum(tmp < tol) # selection index = list(np.where(z >= k)[0]) if len(index) == 0: index.append(l) conv = 0 # target of k null entries not reached else: conv = 1 # target of k null entries reached l_bar = index[0] lambda_bar = lambda_vec[l_bar] # output if not opt: c2_bar = c2_bar[:, :, l_bar] # shrunk correlation phi2_bar = phi2_bar[:, :, l_bar] # shrunk inverse correlation l_bar = None # shrunk covariance sigma2_bar = np.diag(sigma_vec) @ c2_bar @ np.diag(sigma_vec) else: sigma2_bar = np.zeros(sigma2.shape + (l_, )) for l in range(l_): sigma2_bar[:, :, l] = np.diag(sigma_vec) @ c2_bar[:, :, l] @ \ np.diag(sigma_vec) return sigma2_bar, c2_bar, phi2_bar, lambda_bar, conv, l_bar
def fit(self, X, y): """Fit the QDA to the training data""" methods = [ None, 'nonpara', "fr", "kl", "mean", "wass", "reg", "freg", "sparse", "kl_new" ] rules = ["qda", "da", "fda"] if self.method not in methods: raise ValueError("method must be in {}; got (method={})".format( methods, self.method)) if self.rule not in rules: raise ValueError("rule must be in {}; got (rule={})".format( rules, self.rule)) X, y = check_X_y(X, y) self.labels_, self.n_samples_ = np.unique(y, return_counts=True) self.n_class_ = self.labels_.size n_samples, self.n_features_ = X.shape self.rho_ = np.array([self.rho]).ravel() if self.rho == -1: chi_quantile = chi2.ppf( 0.5, self.n_features_ * (self.n_features_ + 3) / 2) self.rho_ = chi_quantile * np.ones(self.n_class_) / self.n_samples_ else: if self.rho_.size == 1: self.rho_ = self.rho_[0] * np.ones(self.n_class_) if self.adaptive: self.rho_ *= np.sqrt(self.n_features_) # PRINT!!!! #print(self.n_features_, chi_quantile,self.n_samples_,self.rho_) if self.priors is None: self.priors_ = self.n_samples_ / n_samples else: self.priors_ = self.priors self.mean_ = [] self.covariance_ = [] self.cov_sqrt_ = [] self.prec_ = [] self.prec_sqrt_ = [] self.logdet_ = [] self.rotations_ = [] self.scalings_ = [] for n_c, label in enumerate(self.labels_): mask = (y == label) X_c = X[mask, :] X_c_mean = np.mean(X_c, 0) X_c_bar = X_c - X_c_mean U, s, Vt = np.linalg.svd(X_c_bar, full_matrices=False) s2 = (s**2) / (len(X_c_bar) - 1) self.mean_.append(X_c_mean) if self.method == 'reg': s2 += self.rho_[n_c] inv_s2 = 1 / s2 elif self.method in [ 'fr', 'kl', 'mean', 'freg', 'kl_new', 'nonpara' ]: sc = StandardScaler() X_c_ = sc.fit_transform(X_c) cov_c = ledoit_wolf(X_c_)[0] cov_c = sc.scale_[:, np.newaxis] * cov_c * sc.scale_[ np.newaxis, :] s2, V = np.linalg.eigh(cov_c) s2 = np.abs(s2) inv_s2 = 1 / s2 Vt = V.T elif self.method == 'sparse': try: cov_c = GraphicalLasso(alpha=self.rho_[n_c]).fit(X_c_bar) cov_c = cov_c.covariance__ except: tol = self.tol * 1e6 cov_c = graphical_lasso( np.dot(((1 - tol) * s2 + tol) * Vt.T, Vt), self.rho_[n_c])[0] s2, V = np.linalg.eigh(cov_c) s2 = np.abs(s2) inv_s2 = 1 / s2 Vt = V.T elif self.method == 'wass': f = lambda gamma: gamma * (self.rho_[n_c] ** 2 - 0.5 * np.sum(s2)) - self.n_features_ + \ 0.5 * (np.sum(np.sqrt((gamma ** 2) * (s2 ** 2) + 4 * s2 * gamma))) lb = 0 gamma_0 = 0 ub = np.sum(np.sqrt(1 / (s2 + self.tol))) / self.rho_[n_c] f_ub = f(ub) for bsect in range(100): gamma_0 = 0.5 * (ub + lb) f_gamma_0 = f(gamma_0) if f_ub * f_gamma_0 > 0: ub = gamma_0 f_ub = f_gamma_0 else: lb = gamma_0 if abs(ub - lb) < self.tol: break inv_s2 = gamma_0 * (1 - 2 / (1 + np.sqrt(1 + 4 / (gamma_0 * (s2 + self.tol))))) s2 = 1 / (inv_s2 + self.tol) else: s2 += self.tol inv_s2 = 1 / s2 self.covariance_.append(np.dot(s2 * Vt.T, Vt)) self.cov_sqrt_.append(np.dot(np.sqrt(s2) * Vt.T, Vt)) self.prec_.append(np.dot(inv_s2 * Vt.T, Vt)) self.prec_sqrt_.append(np.dot(np.sqrt(inv_s2) * Vt.T, Vt)) self.logdet_.append(np.log(s2).sum()) #print(self.logdet_) self.rotations_.append(Vt) self.scalings_.append(s2) return self
def _fit(self, pairs, y): if not HAS_SKGGM: if self.verbose: print("SDML will use scikit-learn's graphical lasso solver.") else: if self.verbose: print("SDML will use skggm's graphical lasso solver.") pairs, y = self._prepare_inputs(pairs, y, type_of_inputs='tuples') # set up (the inverse of) the prior M if self.use_cov: X = np.vstack({tuple(row) for row in pairs.reshape(-1, pairs.shape[2])}) prior_inv = np.atleast_2d(np.cov(X, rowvar=False)) else: prior_inv = np.identity(pairs.shape[2]) diff = pairs[:, 0] - pairs[:, 1] loss_matrix = (diff.T * y).dot(diff) emp_cov = prior_inv + self.balance_param * loss_matrix # our initialization will be the matrix with emp_cov's eigenvalues, # with a constant added so that they are all positive (plus an epsilon # to ensure definiteness). This is empirical. w, V = np.linalg.eigh(emp_cov) min_eigval = np.min(w) if min_eigval < 0.: warnings.warn("Warning, the input matrix of graphical lasso is not " "positive semi-definite (PSD). The algorithm may diverge, " "and lead to degenerate solutions. " "To prevent that, try to decrease the balance parameter " "`balance_param` and/or to set use_cov=False.", ConvergenceWarning) w -= min_eigval # we translate the eigenvalues to make them all positive w += 1e-10 # we add a small offset to avoid definiteness problems sigma0 = (V * w).dot(V.T) try: if HAS_SKGGM: theta0 = pinvh(sigma0) M, _, _, _, _, _ = quic(emp_cov, lam=self.sparsity_param, msg=self.verbose, Theta0=theta0, Sigma0=sigma0) else: _, M = graphical_lasso(emp_cov, alpha=self.sparsity_param, verbose=self.verbose, cov_init=sigma0) raised_error = None w_mahalanobis, _ = np.linalg.eigh(M) not_spd = any(w_mahalanobis < 0.) not_finite = not np.isfinite(M).all() except Exception as e: raised_error = e not_spd = False # not_spd not applicable here so we set to False not_finite = False # not_finite not applicable here so we set to False if raised_error is not None or not_spd or not_finite: msg = ("There was a problem in SDML when using {}'s graphical " "lasso solver.").format("skggm" if HAS_SKGGM else "scikit-learn") if not HAS_SKGGM: skggm_advice = (" skggm's graphical lasso can sometimes converge " "on non SPD cases where scikit-learn's graphical " "lasso fails to converge. Try to install skggm and " "rerun the algorithm (see the README.md for the " "right version of skggm).") msg += skggm_advice if raised_error is not None: msg += " The following error message was thrown: {}.".format( raised_error) raise RuntimeError(msg) self.transformer_ = transformer_from_metric(np.atleast_2d(M)) return self
def solve_glasso2(cov, lambda_): return graphical_lasso(cov, lambda_)[1]
# (a)(ii) Observations # All covariances are positive, with smallest value 3.63e-05 # Many pairwise precisions are near 0, but not exactly 0 # Suggests that the underlying undirected graphical model is not sparse # (b) Graphical Lasso # As alpha increases in size, the covariance elements shrink towards 0 # Sparsity of graph also shrinks from sklearn.covariance import graphical_lasso penalized_covs, penalized_precs = {}, {} for alpha in [1e-5, 1e-4, 1e-3]: penalized_cov, penalized_prec = graphical_lasso( emp_cov=sample_cov, alpha=alpha, max_iter=1000) penalized_covs[alpha] = penalized_cov penalized_precs[alpha] = penalized_prec print(f'Number of Non-Zero Edges (alpha = {alpha}): {np.sum(penalized_prec != 0.)}', ) fig, axes = plt.subplots(nrows=1, ncols=2) axes[0].set_xlabel('Pairwise Covariances') axes[1].set_xlabel('Pairwise Precisions') for alpha, penalized_cov in penalized_covs.items(): axes[0].hist(penalized_cov.flatten(), label=r'$\alpha = $' + str(alpha), bins=50) axes[1].hist(penalized_precs[alpha].flatten(), label=r'$\alpha = $' + str(alpha), bins=50)
def get_alpha_max(X, observation, sigma_min, pb_name, alpha_Sigma_inv=None): """Compute alpha_max specific to pb_name. Parameters: ---------- X: np.array, shape (n_channels, n_sources) observation: np.array, shape (n_channels, n_times) or (n_epochs, n_channels, n_times) sigma_min: float, >0 pb_name: string, "SGCL" "CLaR" "MTL" "MTLME" Output: ------- float alpha_max of the optimization problem. """ n_channels, n_times = observation.shape[-2], observation.shape[-1] if observation.ndim == 3: Y = observation.mean(axis=0) else: Y = observation if pb_name == "MTL": n_channels, n_times = Y.shape alpha_max = l_2_inf(X.T @ Y) / (n_times * n_channels) elif pb_name == "MTLME": observations = observation.transpose((1, 0, 2)) observations = observations.reshape(observations.shape[0], -1) alpha_max = get_alpha_max(X, observations, sigma_min, "MTL") elif pb_name == "SGCL": assert observation.ndim == 2 _, S_max_inv = clp_sqrt(Y @ Y.T / n_times, sigma_min) alpha_max = l_2_inf(X.T @ S_max_inv @ Y) alpha_max /= (n_channels * n_times) elif pb_name == "CLAR" or pb_name == "NNCVX": n_epochs = observation.shape[0] cov_Yl = 0 for l in range(n_epochs): cov_Yl += observation[l, :, :] @ observation[l, :, :].T cov_Yl /= (n_epochs * n_times) _, S_max_inv = clp_sqrt(cov_Yl, sigma_min) alpha_max = l_2_inf(X.T @ S_max_inv @ Y) alpha_max /= (n_channels * n_times) elif pb_name == "mrce": assert observation.ndim == 3 assert alpha_Sigma_inv is not None emp_cov = get_emp_cov(observation) Sigma, Sigma_inv = graphical_lasso(emp_cov, alpha_Sigma_inv, max_iter=10**6) alpha_max = l_2_inf(X.T @ Sigma_inv @ Y) / (n_channels * n_times) elif pb_name == "glasso": assert observation.ndim == 2 assert alpha_Sigma_inv is not None emp_cov = observation @ observation.T / n_times Sigma, Sigma_inv = graphical_lasso(emp_cov, alpha_Sigma_inv) alpha_max = l_2_inf(X.T @ Sigma_inv @ Y) / (n_channels * n_times) elif pb_name == "mrce": assert observation.ndim == 2 assert alpha_Sigma_inv is not None emp_cov = observation @ observation.T / n_times Sigma, Sigma_inv = graphical_lasso(emp_cov, alpha_Sigma_inv, max_iter=10**6) alpha_max = np.abs(X.T @ Sigma_inv @ Y).max() / (n_channels * n_times) else: raise NotImplementedError("No solver '{}' in sgcl".format(pb_name)) return alpha_max
def fit_lfm_roblasso(x, z, p=None, nu=1e9, lambda_beta=0., lambda_phi=0., tol=1e-3, fit_intercept=True, maxiter=500, print_iter=False, rescale=False): """For details, see here. Parameters ---------- x : array, shape(t_, n_) z : array, shape(t_, k_) p : array, optional, shape(t_) nu : scalar, optional lambda_beta : scalar, optional lambda_phil : scalar, optional tol : float, optional fit_intercept: bool, optional maxiter : scalar, optional print_iter : bool, optional rescale : bool, optional Returns ------- alpha_RMLFP : array, shape(n_,) beta_RMLFP : array, shape(n_,k_) sig2_RMLFP : array, shape(n_,n_) """ if len(x.shape) == 1: x = x.reshape(-1, 1) if len(z.shape) == 1: z = z.reshape(-1, 1) t_, n_ = x.shape if p is None: p = np.ones(t_) / t_ # rescale the variables if rescale is True: _, sigma2_x = meancov_sp(x, p) sigma_x = np.sqrt(np.diag(sigma2_x)) x = x / sigma_x _, sigma2_z = meancov_sp(z, p) sigma_z = np.sqrt(np.diag(sigma2_z)) z = z / sigma_z # Step 0: Set initial values using method of moments alpha, beta, sigma2, u = fit_lfm_lasso(x, z, p, lambda_beta, fit_intercept=fit_intercept) mu_u = np.zeros(n_) for i in range(maxiter): # Step 1: Update the weights if nu >= 1e3 and np.linalg.det(sigma2) < 1e-13: w = np.ones(t_) else: w = (nu + n_) / (nu + mahalanobis_dist(u, mu_u, sigma2)**2) q = w * p q = q / np.sum(q) # Step 2: Update location and dispersion parameters alpha_old, beta_old = alpha, beta alpha, beta, sigma2, u = fit_lfm_lasso(x, z, q, lambda_beta, fit_intercept=fit_intercept) sigma2, _ = graphical_lasso((w @ p) * sigma2, lambda_phi) # Step 3: Check convergence errors = [ np.linalg.norm(alpha - alpha_old, ord=np.inf) / max(np.linalg.norm(alpha_old, ord=np.inf), 1e-20), np.linalg.norm(beta - beta_old, ord=np.inf) / max(np.linalg.norm(beta_old, ord=np.inf), 1e-20) ] # print the loglikelihood and the error if print_iter is True: print('Iter: %i; Loglikelihood: %.5f; Errors: %.5f' % (i, p @ mvt_logpdf(u, mu_u, sigma2, nu) - lambda_beta * np.linalg.norm(beta, ord=1), max(errors))) if max(errors) <= tol: break if rescale is True: alpha = alpha * sigma_x beta = ((beta / sigma_z).T * sigma_x).T sigma2 = (sigma2.T * sigma_x).T * sigma_x return alpha, beta, sigma2
def fit_CV(self): """" test many alphas, pick the one with best EBIC """ ebic_vals = [] #np.ones(len(alphas)) * np.inf prec_list = [] # np.zeros((len(alphas), self.p, self.p)) cov_list = [] # np.zeros((len(alphas), self.p, self.p)) alpha_list = [] alpha = 0.5 max_itr = 1000 best_not_found = True nr_iterations = 0 best_ebic = np.inf time_since_last_min = 0 not_all_sparse = True while (best_not_found) and (nr_iterations < max_itr) and (not_all_sparse): # print(f'{nr_iterations} {alpha}') alpha_list.append(alpha) try: out_cov, out_prec = graphical_lasso( emp_cov=self.emp_cov.copy(), alpha=alpha) except FloatingPointError: ebic_vals.append(np.inf) prec_list.append(np.inf) cov_list.append(np.inf) alpha += 0.01 nr_iterations += 1 continue except ValueError: ebic_vals.append(np.inf) prec_list.append(np.inf) cov_list.append(np.inf) alpha += 0.01 nr_iterations += 1 continue ebic_t = EBIC(self.N, self.emp_cov, out_prec, beta=self.beta) print( f'{nr_iterations} {alpha} {ebic_t} {self.N * gaussian_likelihood(self.emp_cov, out_prec)}' ) ebic_vals.append(ebic_t) time_since_last_min += 1 if ebic_t < best_ebic: best_ebic = ebic_t time_since_last_min = 0 best_alpha = alpha prec_list.append(out_prec) cov_list.append(out_cov) if time_since_last_min > 50: best_not_found = False alpha += 0.01 nr_iterations += 1 not_all_sparse = (np.count_nonzero(np.triu(out_prec, 1)) != 0) best_idx = np.argmin(ebic_vals) if np.isscalar(best_idx): best_prec = prec_list[best_idx] best_alpha = alpha_list[best_idx] else: best_prec = prec_list[best_idx[0]] best_alpha = alpha_list[best_idx[0]] return best_prec, prec_list, cov_list, ebic_vals, best_alpha, alpha_list
def fit( self, TS, alpha=0.01, max_iter=100, tol=0.0001, threshold_type='degree', **kwargs ): """Performs a graphical lasso. For details see [1, 2]. The results dictionary also stores the covariance matrix as `'weights_matrix'`, the precision matrix as `'precision_matrix'`, and the thresholded version of the covariance matrix as `'thresholded_matrix'`. This implementation uses `scikit-learn`'s implementation of the graphical lasso; for convenience two control parameters `tol` and `max_iter` are available to interface with their method. Parameters ---------- TS (np.ndarray) Array consisting of :math:`L` observations from :math:`N` sensors. alpha (float, default=0.01) Coefficient of penalization, higher values means more sparseness max_iter (int, default=100) Maximum number of iterations. tol (float, default=0.0001) Stop the algorithm when the duality gap is below a certain threshold. threshold_type (str) Which thresholding function to use on the matrix of weights. See `netrd.utilities.threshold.py` for documentation. Pass additional arguments to the thresholder using ``**kwargs``. Returns ------- G (nx.Graph) A reconstructed graph with :math:`N` nodes. References ---------- .. [1] J. Friedman, T. Hastie, R. Tibshirani, "Sparse inverse covariance estimation with the graphical lasso", Biostatistics 9, pp. 432–441 (2008). .. [2] https://github.com/CamDavidsonPilon/Graphical-Lasso-in-Finance """ emp_cov = np.cov(TS) cov, prec = graphical_lasso(emp_cov, alpha, max_iter=max_iter, tol=tol) self.results['weights_matrix'] = cov self.results['precision_matrix'] = prec # threshold the network self.results['thresholded_matrix'] = threshold( self.results['weights_matrix'], threshold_type, **kwargs ) # construct the network G = create_graph(self.results['thresholded_matrix']) self.results['graph'] = G return G
def run_samples_lasso(N, B, alpha, theta1, theta2, s1, s2): import myKernels.RandomWalk as rw test_info = pd.DataFrame() k = theta1.shape[0] for sample in tqdm.tqdm(range(N)): Gs1 = [] Gs2 = [] error_1 = [] error_2 = [] n = 50 for i in range(50): x1 = np.random.multivariate_normal(mean=np.zeros(k), cov=theta1, size=100) A1 = np.corrcoef(x1.T) if alpha == 0: np.fill_diagonal(A1, 0) A1[np.abs(A1) < 1e-5] = 0 else: gl = graphical_lasso(A1, alpha=alpha, max_iter=1000) A1 = gl[0] A1[np.abs(A1) < 1e-5] = 0 np.fill_diagonal(A1, 0) Gs1.append(nx.from_numpy_matrix(A1)) error_1.append( np.sum( np.logical_xor( np.abs(np.triu(A1, 1)) > 0, np.abs(np.triu(theta1, 1)) > 0))) x2 = np.random.multivariate_normal(mean=np.zeros(k), cov=theta2, size=100) A2 = np.corrcoef(x2.T) if alpha == 0: np.fill_diagonal(A2, 0) A2[np.abs(A2) < 1e-5] = 0 else: gl = graphical_lasso(A2, alpha=alpha, max_iter=1000) A2 = gl[0] A2[np.abs(A2) < 1e-5] = 0 np.fill_diagonal(A2, 0) Gs2.append(nx.from_numpy_matrix(A2)) error_2.append( np.sum( np.logical_xor( np.abs(np.triu(A2, 1)) > 0, np.abs(np.triu(theta2, 1)) > 0))) Gs = Gs1 + Gs2 try: #rw_kernel = rw.RandomWalk(Gs, c = 0.0001, normalize=0) #K = rw_kernel.fit_ARKU_plus(r = 6, normalize_adj=False, edge_attr= None, verbose=False) graph_list = gk.graph_from_networkx(Gs) kernel = [{"name": "SP", "with_labels": 0}] init_kernel = gk.GraphKernel(kernel=kernel, normalize=0) K = init_kernel.fit_transform(graph_list) except: continue MMD_functions = [mg.MMD_b, mg.MMD_u] kernel_hypothesis = mg.BoostrapMethods(MMD_functions) function_arguments = [dict(n=n, m=n), dict(n=n, m=n)] kernel_hypothesis.Bootstrap(K, function_arguments, B=B) #print(f'p_value {kernel_hypothesis.p_values}') #print(f"MMD_u {kernel_hypothesis.sample_test_statistic['MMD_u']}") test_info = pd.concat( (test_info, pd.DataFrame( { 'p_val': kernel_hypothesis.p_values['MMD_u'], 'sample': sample, 'mean_error_1': np.mean(error_1), 'mean_error_2': np.mean(error_2), 'alpha': alpha, 's1': s1, 's2': s2 }, index=[0])), ignore_index=True) return test_info