def centralized_graphical_lasso(X, D, alpha=0.01, max_iter=100, convg_threshold=0.001): """ This function computes the graphical lasso algorithm as outlined in Sparse inverse covariance estimation with the graphical lasso (2007). inputs: X: the data matrix, size (nxd) alpha: the coefficient of penalization, higher values means more sparseness. max_iter: maximum number of iterations convg_threshold: Stop the algorithm when the duality gap is below a certain threshold. """ if alpha == 0: return cov_estimator(X) n_features = X.shape[1] mle_estimate_ = cov_estimator(X) covariance_ = mle_estimate_.copy() precision_ = np.linalg.pinv(mle_estimate_) indices = np.arange(n_features) for i in xrange(max_iter): for n in range(n_features): sub_estimate = covariance_[indices != n].T[indices != n] row = mle_estimate_[n, indices != n] # solve the lasso problem # Not now DAAAAAAAAARLING! lez do the generalized lasso instead # _, _, coefs_ = lars_path( sub_estimate, row, Xy = row, Gram = sub_estimate, # alpha_min = alpha/(n_features-1.), # method = "lars") # coefs_ = coefs_[:,-1] #just the last please. # clf = linear_model.Lasso(alpha=alpha) # clf.fit(sub_estimate, row) # coefs_ = clf.coef_ coefs_ = generalized_lasso(sub_estimate, row, D, alpha) # update the precision matrix. precision_[n, n] = 1.0 / (covariance_[n, n] - np.dot(covariance_[indices != n, n], coefs_)) precision_[indices != n, n] = -precision_[n, n] * coefs_ precision_[n, indices != n] = -precision_[n, n] * coefs_ temp_coefs = np.dot(sub_estimate, coefs_) covariance_[n, indices != n] = temp_coefs covariance_[indices != n, n] = temp_coefs print "Finished iteration %s" % i # if test_convergence( old_estimate_, new_estimate_, mle_estimate_, convg_threshold): if np.abs(_dual_gap(mle_estimate_, precision_, alpha)) < convg_threshold: break else: # this triggers if not break command occurs print "The algorithm did not coverge. Try increasing the max number of iterations." return covariance_, precision_
from generalized_lasso import generalized_lasso from graphical_lasso import centralized_graphical_lasso from matplotlib.pylab import * import gett.io print 'Testing generalized_lasso' A = randn(4,40) y = randn(4) D = rand(3,40) def lasso_fun(x): return norm(dot(A,x) - y) + abs(dot(D, x).sum()) res = generalized_lasso(A, y, D, 1) print 'Lasso result, evaluated in lasso function: %s' % lasso_fun(res) print 'Random number, evaluated in lasso function: %s' % lasso_fun(randn(len(res))) print 'Testing centralized_graphical_lasso' # Read the heart failure eqtl dataset! #samples, genenames, Mexp = gett.io.read_expression_matrix(open('../hf_eqtl/full_exp_varcutoff_50.txt')) print 'Finished reading' Mexp = randn(400,20000) D = eye(Mexp.shape[0]) covariance, precision = centralized_graphical_lasso(Mexp.T, D, max_iter=1000) print covariance print precision