def computePartialCorrelations(coupling_data, reg_alpha): # standardize # coupling_data -= coupling_data.mean(axis=0) # coupling_data /= coupling_data.std(axis=0) # sparse inverse covariance matrix estimation estimator = GraphLasso(alpha=reg_alpha, assume_centered=False, mode='cd', max_iter=500) estimator.fit(coupling_data) print("Sparse inverse covariance matrix was estiamted with {0} iterations.".format(estimator.n_iter_)) print("\t\t\t and by using the parameters: ", estimator.get_params()) prec = estimator.get_precision() #diagonal of precision matrix prec_diag = np.diag(prec) # obtain partial correlations (proportional to prec matrix entries): # rho_ij = - p_ij/ sqrt(p_ii * p_jj) partial_correlations = -prec / np.sqrt(np.outer(prec_diag, prec_diag)) # d = 1 / np.sqrt(np.diag(prec)) # partial_correlations *= d # partial_correlations *= d[:, np.newaxis] # set lower half to zero partial_correlations[np.tril_indices(400)] = 0 return estimator.get_precision(), partial_correlations
def create_skeleton_from_data(self, data, **kwargs): """ :param data: raw data df :param kwargs: alpha hyper-parameter ( :return: """ alpha = kwargs.get('alpha', 0.01) max_iter = kwargs.get('max_iter', 2000) edge_model = GraphLasso(alpha=alpha, max_iter=max_iter) edge_model.fit(data.as_matrix()) return edge_model.get_precision()
def get_other_precision(A): # reference on sklearn's graph lasso: http://scikit-learn.org/stable/modules/generated/sklearn.covariance.GraphLasso.html from sklearn.covariance import GraphLasso # our Algo code should replace this and input/output the same thing graph_lasso = GraphLasso( alpha=1e-5 ) # alpha = regularization parameter: the higher alpha, the more regularization, the sparser the inverse covariance. graph_lasso.fit( A ) # A is the aggregated sentiment matrix, an arrray of (n_samples, n_features) precision = graph_lasso.get_precision() return precision
def precisionCol(cleandata, k): model = GraphLasso(mode = 'lars') model.fit(cleandata) pre_ = pd.DataFrame(model.get_precision()) pre_.index = cleandata.columns pre_.columns = cleandata.columns pre_.to_csv("precision.csv") test = abs(pre_['Y']) test.sort() test = test[-k:] coltest = (test.index).drop('Y') return coltest
def predict(self, data, **kwargs): """ :param data: raw data df :param kwargs: alpha hyper-parameter ( :return: """ alpha = kwargs.get('alpha', 0.01) max_iter = kwargs.get('max_iter', 2000) edge_model = GraphLasso(alpha=alpha, max_iter=max_iter) edge_model.fit(data.values) return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()), {idx: i for idx, i in enumerate(data.columns)})
def predict(self, data, alpha=0.01, max_iter=2000, **kwargs): """ Predict the graph skeleton. Args: data (pandas.DataFrame): observational data alpha (float): regularization parameter max_iter (int): maximum number of iterations Returns: networkx.Graph: Graph skeleton """ edge_model = GraphLasso(alpha=alpha, max_iter=max_iter) edge_model.fit(data.values) return nx.relabel_nodes(nx.DiGraph(edge_model.get_precision()), {idx: i for idx, i in enumerate(data.columns)})
def _init_para(self, X, y): ''' ''' p0, shape = BASE._init_para(self, X, y) edges = [] if self.sparsity==0: for i in itertools.combinations(range(y.shape[1]), 2): e1 = min(i) e2 = max(i) edges.append([e1, e2]) else: lasso = GraphLasso(alpha = 0.1) lasso.fit(y) graph = lasso.get_precision()!=0 for i in range(graph.shape[0]): for j in range(i+1,graph.shape[1]): if graph[i,j]==0:continue edges.append([i, j]) self.edges = T.shared(np.array(edges).T).astype('int8') if self.verbose: print('(edges)',edges) print('(y) shape:',y.shape,'labels:',np.unique(y)) print('(X) shape:',X.shape,'std:',np.std(X)) theta = [] for e1, e2 in edges: if self.shared_copula: theta.append(0.01) else: theta.append(np.ones((shape[1],shape[1]))*0.01) p0['theta'] = tespo.parameter(theta, const=False) return p0, shape
re_error_750_ppca = getReconstructionError(sample_750, re_750points_ppca.T) drawReconstructionError(re_error_750_ppca) plt.title('reconstruction error of 750 points of PPCA') plt.show() #reconstruct the 250 points sample_250 = sample[750:1000] re_250points_ppca = W_ppca.dot(sample_250.T) re_250points_ppca = getPPCAInverseTransform(re_250points_ppca.T) re_error_250_ppca = getReconstructionError(sample_250, re_250points_ppca.T) drawReconstructionError(re_error_250_ppca) plt.title('reconstruction error of 250 points of PPCA') plt.show() #============================================================================== #problem 2.9 #============================================================================== from sklearn.covariance import GraphLasso gl = GraphLasso(0.01) gl.fit(sample_750) cov_gl = gl.covariance_ covarianceMatrix(cov_gl) plt.title('convariance matrix of GL') plt.show() prec_gl = gl.get_precision() covarianceMatrix(prec_gl) plt.title('inverse convariance matrix of GL') plt.show()
C_emp = X.dot(X.T) / float(N) print('Empirical Cov:') print C_emp # neighborhood selection nhs = NeighborSelect(EDPP(), ProximalGradientSolver(), path_lb=0.2, path_steps=5, path_scale='log') Cb = nhs.fit(np.ascontiguousarray(X)) print Cb glasso = GraphLasso(alpha=0.005, tol=0.0001, max_iter=1000, verbose=False) glasso.fit(X.T) C = glasso.get_precision() print glasso.error_norm(COV) print('GraphLasso Cov:') print C # plot some example network plt.figure() plt.subplot(2, len(Cb), 1) plt.title('Cov') plt.pcolor(COV) plt.subplot(2, len(Cb), 2) plt.title('Emp. Cov') plt.pcolor(C_emp) plt.subplot(2, len(Cb), 3) plt.title('GraphLasso')