Esempio n. 1
0
def lw(data, alphas):
    """
        Estimates the graph with Ledoit-Wolf estimator.

        Parameters
        ----------
        data: numpy ndarray
            The input data for to reconstruct/estimate a graph on. Features as columns and observations as rows.
        alphas: float
            The threshold on the precision matrix to determine edges.
        Returns
        -------
        adjacency matrix : the estimated adjacency matrix.
    """
    alpha=alphas
    scaler = StandardScaler()
    data = scaler.fit_transform(data)
    cov = LedoitWolf().fit(data)
    precision_matrix = cov.get_precision()
    n_features, _ = precision_matrix.shape
    mask1 = np.abs(precision_matrix) > alpha
    mask0 = np.abs(precision_matrix) <= alpha
    adjacency_matrix = np.zeros((n_features,n_features))
    adjacency_matrix[mask1] = 1
    adjacency_matrix[mask0] = 0
    adjacency_matrix[np.diag_indices_from(adjacency_matrix)] = 0
    return adjacency_matrix
def main():
    '''
    Constructs a co-occurence network from gene expression data.

    Main entry point to code.
    '''

    # Read in the data
    if os.path.isfile(DATA_PICKLE):
        print("reading previously saved data from pickle %s" % (DATA_PICKLE))
        with open(DATA_PICKLE, 'rb') as file:
            df = pickle.load(file)
            lwe = pickle.load(file)
            pmat = pickle.load(file)
            pcore_indices = pickle.load(file)
            pcor = pickle.load(file)
            lfdr_pcor = pickle.load(file)
            #prob = pickle.load(file)
    else:
        print("reading in data from %s" % (FILENAME))
        df = pd.read_csv(FILENAME, sep='\t')
        print("found %d rows and %d columns" % (df.shape[0], df.shape[1]))
        # compute the row means and sort the data frame by descinding means
        df['row_means'] = df.mean(axis=1)
        df.sort_values('row_means', axis=0, ascending=False, inplace=True)
        df.drop('row_means', axis=1, inplace=True)
        # take the most abundant genes
        df = df.head(PRUNE_GENES)

        # Ledoit-Wolf optimal shrinkage coefficient estimate
        print("computing Ledoit-Wolf optimal shrinkage coeffecient estimate")
        lwe = LedoitWolf().fit(df.transpose())
        pmat = lwe.get_precision()
        # Convert symmetric matrix to array, first by getting indices
        # of the off diagonal elements, second by pulling them into
        # separate array (pcor).
        print("extracting off diagnol elements of precision matrix")
        pcor_indices = np.triu_indices(pmat.shape[0], 1)
        pcor = pmat[pcor_indices]

        # Determine edges by computing lfdr of pcor.
        print("computing lfdr of partial correlations")
        fdrtool = importr('fdrtool')
        lfdr_pcor = fdrtool.fdrtool(FloatVector(pcor), statistic="correlation", plot=False)
        #prob = 1-lfdr_pcor['lfdr']

        with open(DATA_PICKLE, 'wb') as file:
            pickle.dump(df, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(lwe, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pmat, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pcor_indices, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pcor, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(lfdr_pcor, file, pickle.HIGHEST_PROTOCOL)
            #pickle.dump(prob, file, pickle.HIGHEST_PROTOCOL)

    print("making 1-lfdr vs. pcor plot")
    prob = 1-np.array(lfdr_pcor.rx2('lfdr'))
    with PdfPages(PDF_FILENAME) as pdf:
        plt.figure(figsize=(3, 3))
        plt.plot(range(7), [3, 1, 4, 1, 5, 9, 2], 'r-o')
        plt.title('Page One')
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        plt.plot(pcor[0:10000:10], prob[0:10000:10], 'o', markeredgecolor='k', markersize=3)
        plt.title("THIS IS A PLOT TITLE, YOU BET")
        plt.xlabel('partial correlation')
        plt.ylabel('lfdr')
        pdf.savefig
        plt.close()
spOrder = cs_amdW(sp, order, myCSparceLibP)

#estimate precision matrix from either Gaussian graphical model or
#the LedoitWolf regularisation (here we are using the second)
#note that with the typical correlation coefficients the estimation
#of the covariance matrix is not well regularised especially with a large number of rois.
#This problem is exacerbated with the inversion of the correlation matrix 
print "Loading fMRI averaged signal ..."
print "Estimate the precision matrix ..."
precAll = np.empty([lenR,lenR,subjNum])
for ii,subj in enumerate(folder_names):
  inputP = pathfMRI + 'timeSeriesR' + prefS + '_' + subj + '.txt'
  signal = np.loadtxt(inputP)
  LedW = LedoitWolf(store_precision=True, assume_centered=False)
  LedW.fit(signal.T)
  prec = LedW.get_precision()   #estimate the precision matrix based on the LedoitWolf regularisation
  precAll[:,:,ii] = prec
    
print "Preparing Data for randomised Lasso: Cholesky decomposition, scaling diagonal and so on..."    
#prepare data for ransomised Lasso
Y,X,ri0,ci0 = prepChol(scAll,precAll,sp,spOrder)

print "Randomised Lasso is running ..."
prob_all = run_randomisedLasso(X.T,Y)

print "Reorder data and visualise ..."
adj2,adj = visual_res(prob_all,lenR,roisF,ri0,ci0,thres)

pl.matshow(adj2)
pl.colorbar()
pl.show()
Esempio n. 4
0
def main():
    '''
    Constructs a co-occurence network from gene expression data.

    Main entry point to code.
    '''
    # Read in the data
    if os.path.isfile(DATA_PICKLE):
        print("reading previously saved data from pickle %s" % (DATA_PICKLE))
        with open(DATA_PICKLE, 'rb') as file:
            df = pickle.load(file)
            lwe = pickle.load(file)
            pmat = pickle.load(file)
            pcore_indices = pickle.load(file)
            pcor = pickle.load(file)
            lfdr_pcor = pickle.load(file)
            #prob = pickle.load(file)
    else:
        print("reading in data from %s" % (FILENAME))
        df = pd.read_csv(FILENAME, sep='\t')

        # TODO: remove this experimental data-trimming
        #if NUM_ROWS_DEV_SCALE is not None:
        #    old_shape = df.shape
        #    df = df.iloc[0:NUM_ROWS_DEV_SCALE, ]
        #    print('DEV MODE: TRIMED DATA FROM {} to {}'.format(old_shape, df.shape))

        print("found %d rows and %d columns" % (df.shape[0], df.shape[1]))
        # compute the row means and sort the data frame by descinding means
        df['row_means'] = df.mean(axis=1)
        df.sort_values('row_means', axis=0, ascending=False, inplace=True)
        df.drop('row_means', axis=1, inplace=True)
        # take the most abundant genes
        #df = df.head(PRUNE_GENES)

        # Ledoit-Wolf optimal shrinkage coefficient estimate
        print("computing Ledoit-Wolf optimal shrinkage coeffecient estimate")
        start_time = datetime.now()
        print('time: {}'.format(str(start_time)))
        lwe = LedoitWolf().fit(df.transpose())
        end_time = datetime.now()
        total_time = end_time - start_time
        print('LedoitWolf time for {} genes: {}'.format(df.shape[0], str(total_time)))

        pmat = lwe.get_precision()
        # Convert symmetric matrix to array, first by getting indices
        # of the off diagonal elements, second by pulling them into
        # separate array (pcor).
        print("extracting off diagnol elements of precision matrix")
        pcor_indices = np.triu_indices(pmat.shape[0], 1)
        pcor = pmat[pcor_indices]

        # Determine edges by computing lfdr of pcor.
        print("computing lfdr of partial correlations")
        fdrtool = importr('fdrtool')
        lfdr_pcor = fdrtool.fdrtool(FloatVector(pcor), statistic="correlation", plot=False)
        #prob = 1-lfdr_pcor['lfdr']

        with open(DATA_PICKLE, 'wb') as file:
            pickle.dump(df, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(lwe, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pmat, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pcor_indices, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(pcor, file, pickle.HIGHEST_PROTOCOL)
            pickle.dump(lfdr_pcor, file, pickle.HIGHEST_PROTOCOL)
            #pickle.dump(prob, file, pickle.HIGHEST_PROTOCOL)

    print("making 1-lfdr vs. pcor plot")
    prob = 1-np.array(lfdr_pcor.rx2('lfdr'))
    with PdfPages(PDF_FILENAME) as pdf:
        plt.figure(figsize=(3, 3))
        plt.plot(range(7), [3, 1, 4, 1, 5, 9, 2], 'r-o')
        plt.title('Page One')
        pdf.savefig()  # saves the current figure into a pdf page
        plt.close()

        plt.plot(pcor[0:10000:10], prob[0:10000:10], 'o', markeredgecolor='k', markersize=3)
        plt.title("THIS IS A PLOT TITLE, YOU BET")
        plt.xlabel('partial correlation')
        plt.ylabel('lfdr')
        pdf.savefig
        plt.close()