def lw(data, alphas): """ Estimates the graph with Ledoit-Wolf estimator. Parameters ---------- data: numpy ndarray The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows. alphas: float The threshold on the precision matrix to determine edges. Returns ------- adjacency matrix : the estimated adjacency matrix. """ alpha=alphas scaler = StandardScaler() data = scaler.fit_transform(data) cov = LedoitWolf().fit(data) precision_matrix = cov.get_precision() n_features, _ = precision_matrix.shape mask1 = np.abs(precision_matrix) > alpha mask0 = np.abs(precision_matrix) <= alpha adjacency_matrix = np.zeros((n_features,n_features)) adjacency_matrix[mask1] = 1 adjacency_matrix[mask0] = 0 adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0 return adjacency_matrix
def main(): ''' Constructs a co-occurence network from gene expression data. Main entry point to code. ''' # Read in the data if os.path.isfile(DATA_PICKLE): print("reading previously saved data from pickle %s" % (DATA_PICKLE)) with open(DATA_PICKLE, 'rb') as file: df = pickle.load(file) lwe = pickle.load(file) pmat = pickle.load(file) pcore_indices = pickle.load(file) pcor = pickle.load(file) lfdr_pcor = pickle.load(file) #prob = pickle.load(file) else: print("reading in data from %s" % (FILENAME)) df = pd.read_csv(FILENAME, sep='\t') print("found %d rows and %d columns" % (df.shape[0], df.shape[1])) # compute the row means and sort the data frame by descinding means df['row_means'] = df.mean(axis=1) df.sort_values('row_means', axis=0, ascending=False, inplace=True) df.drop('row_means', axis=1, inplace=True) # take the most abundant genes df = df.head(PRUNE_GENES) # Ledoit-Wolf optimal shrinkage coefficient estimate print("computing Ledoit-Wolf optimal shrinkage coeffecient estimate") lwe = LedoitWolf().fit(df.transpose()) pmat = lwe.get_precision() # Convert symmetric matrix to array, first by getting indices # of the off diagonal elements, second by pulling them into # separate array (pcor). print("extracting off diagnol elements of precision matrix") pcor_indices = np.triu_indices(pmat.shape[0], 1) pcor = pmat[pcor_indices] # Determine edges by computing lfdr of pcor. print("computing lfdr of partial correlations") fdrtool = importr('fdrtool') lfdr_pcor = fdrtool.fdrtool(FloatVector(pcor), statistic="correlation", plot=False) #prob = 1-lfdr_pcor['lfdr'] with open(DATA_PICKLE, 'wb') as file: pickle.dump(df, file, pickle.HIGHEST_PROTOCOL) pickle.dump(lwe, file, pickle.HIGHEST_PROTOCOL) pickle.dump(pmat, file, pickle.HIGHEST_PROTOCOL) pickle.dump(pcor_indices, file, pickle.HIGHEST_PROTOCOL) pickle.dump(pcor, file, pickle.HIGHEST_PROTOCOL) pickle.dump(lfdr_pcor, file, pickle.HIGHEST_PROTOCOL) #pickle.dump(prob, file, pickle.HIGHEST_PROTOCOL) print("making 1-lfdr vs. pcor plot") prob = 1-np.array(lfdr_pcor.rx2('lfdr')) with PdfPages(PDF_FILENAME) as pdf: plt.figure(figsize=(3, 3)) plt.plot(range(7), [3, 1, 4, 1, 5, 9, 2], 'r-o') plt.title('Page One') pdf.savefig() # saves the current figure into a pdf page plt.close() plt.plot(pcor[0:10000:10], prob[0:10000:10], 'o', markeredgecolor='k', markersize=3) plt.title("THIS IS A PLOT TITLE, YOU BET") plt.xlabel('partial correlation') plt.ylabel('lfdr') pdf.savefig plt.close()
spOrder = cs_amdW(sp, order, myCSparceLibP) #estimate precision matrix from either Gaussian graphical model or #the LedoitWolf regularisation (here we are using the second) #note that with the typical correlation coefficients the estimation #of the covariance matrix is not well regularised especially with a large number of rois. #This problem is exacerbated with the inversion of the correlation matrix print "Loading fMRI averaged signal ..." print "Estimate the precision matrix ..." precAll = np.empty([lenR,lenR,subjNum]) for ii,subj in enumerate(folder_names): inputP = pathfMRI + 'timeSeriesR' + prefS + '_' + subj + '.txt' signal = np.loadtxt(inputP) LedW = LedoitWolf(store_precision=True, assume_centered=False) LedW.fit(signal.T) prec = LedW.get_precision() #estimate the precision matrix based on the LedoitWolf regularisation precAll[:,:,ii] = prec print "Preparing Data for randomised Lasso: Cholesky decomposition, scaling diagonal and so on..." #prepare data for ransomised Lasso Y,X,ri0,ci0 = prepChol(scAll,precAll,sp,spOrder) print "Randomised Lasso is running ..." prob_all = run_randomisedLasso(X.T,Y) print "Reorder data and visualise ..." adj2,adj = visual_res(prob_all,lenR,roisF,ri0,ci0,thres) pl.matshow(adj2) pl.colorbar() pl.show()
def main(): ''' Constructs a co-occurence network from gene expression data. Main entry point to code. ''' # Read in the data if os.path.isfile(DATA_PICKLE): print("reading previously saved data from pickle %s" % (DATA_PICKLE)) with open(DATA_PICKLE, 'rb') as file: df = pickle.load(file) lwe = pickle.load(file) pmat = pickle.load(file) pcore_indices = pickle.load(file) pcor = pickle.load(file) lfdr_pcor = pickle.load(file) #prob = pickle.load(file) else: print("reading in data from %s" % (FILENAME)) df = pd.read_csv(FILENAME, sep='\t') # TODO: remove this experimental data-trimming #if NUM_ROWS_DEV_SCALE is not None: # old_shape = df.shape # df = df.iloc[0:NUM_ROWS_DEV_SCALE, ] # print('DEV MODE: TRIMED DATA FROM {} to {}'.format(old_shape, df.shape)) print("found %d rows and %d columns" % (df.shape[0], df.shape[1])) # compute the row means and sort the data frame by descinding means df['row_means'] = df.mean(axis=1) df.sort_values('row_means', axis=0, ascending=False, inplace=True) df.drop('row_means', axis=1, inplace=True) # take the most abundant genes #df = df.head(PRUNE_GENES) # Ledoit-Wolf optimal shrinkage coefficient estimate print("computing Ledoit-Wolf optimal shrinkage coeffecient estimate") start_time = datetime.now() print('time: {}'.format(str(start_time))) lwe = LedoitWolf().fit(df.transpose()) end_time = datetime.now() total_time = end_time - start_time print('LedoitWolf time for {} genes: {}'.format(df.shape[0], str(total_time))) pmat = lwe.get_precision() # Convert symmetric matrix to array, first by getting indices # of the off diagonal elements, second by pulling them into # separate array (pcor). print("extracting off diagnol elements of precision matrix") pcor_indices = np.triu_indices(pmat.shape[0], 1) pcor = pmat[pcor_indices] # Determine edges by computing lfdr of pcor. print("computing lfdr of partial correlations") fdrtool = importr('fdrtool') lfdr_pcor = fdrtool.fdrtool(FloatVector(pcor), statistic="correlation", plot=False) #prob = 1-lfdr_pcor['lfdr'] with open(DATA_PICKLE, 'wb') as file: pickle.dump(df, file, pickle.HIGHEST_PROTOCOL) pickle.dump(lwe, file, pickle.HIGHEST_PROTOCOL) pickle.dump(pmat, file, pickle.HIGHEST_PROTOCOL) pickle.dump(pcor_indices, file, pickle.HIGHEST_PROTOCOL) pickle.dump(pcor, file, pickle.HIGHEST_PROTOCOL) pickle.dump(lfdr_pcor, file, pickle.HIGHEST_PROTOCOL) #pickle.dump(prob, file, pickle.HIGHEST_PROTOCOL) print("making 1-lfdr vs. pcor plot") prob = 1-np.array(lfdr_pcor.rx2('lfdr')) with PdfPages(PDF_FILENAME) as pdf: plt.figure(figsize=(3, 3)) plt.plot(range(7), [3, 1, 4, 1, 5, 9, 2], 'r-o') plt.title('Page One') pdf.savefig() # saves the current figure into a pdf page plt.close() plt.plot(pcor[0:10000:10], prob[0:10000:10], 'o', markeredgecolor='k', markersize=3) plt.title("THIS IS A PLOT TITLE, YOU BET") plt.xlabel('partial correlation') plt.ylabel('lfdr') pdf.savefig plt.close()