Exemple #1
0
def calcTFs(
        expr,
        tf_names,
        db,
        prefix,
        motif_path='../data/pySCENIC/ref/motifs-v9-nr.hgnc-m0.001-o0.0.tbl',
        out_path='../data/pySCENIC',
        ppn=8):
    """Computes motifs, regulons and trancriptional factor activation using pySCENIC.

    Arguments
    ---------
    expr: `pandas DataFrame` 
        cell X gene raw counts; FPKM; not TPM as coexpression will be calculated
    tf_names: `list` (`str`)
        curated human transcriptional factor downloaded from github: pySCENIC/ref/hs_hgnc_curated_tfs.txt
    db: `list` (`FeatherRankingDatabase()`)
        feather files, ranking genome [FeatherRankingDatabase(name="hg38__refseq-r80__10kb_up_and_down_tss")]
    prefix: `str` (default: `None`)
        Specify name to save files (eg, cell line names)

    Returns
    -------
    Do not return but write files (the calc takes too long...)
    """

    # Inference of co-expression modules
    adjacencies = grnboost2(expr, tf_names=tf_names, verbose=True)
    modules = list(modules_from_adjacencies(adjacencies, expr))

    # Calculate a list of enriched motifs and the corresponding target genes for all modules.
    with ProgressBar():
        df = prune2df(db, modules, motif_path, num_workers=ppn)

    # Create regulons from this table of enriched motifs.
    regulons = df2regulons(df)

    # Save the enriched motifs and the discovered regulons to disk.
    with open('{}/{}_motifs.csv'.format(out_path, prefix), "wb") as f:
        pickle.dump(regulons, f)

    auc_mtx = aucell(expr, regulons, num_workers=ppn)
    tfs = [tf.strip('(+)') for tf in auc_mtx.columns]
    auc_mtx.to_csv('{}/{}_auc_mtx.csv'.format(out_path, prefix))

    print('finished calculation for %s' % (prefix))
		## Prune modules for targets with cis regulatory footprints (aka RcisTarget)

		### Calculate a list of enriched motifs and the corresponding target genes for all modules.
		if not os.path.isfile(motifs_fname):
			df = prune2df(dbs, modules, motif_annotations, num_workers=n_cores)
			df.to_csv(motifs_fname)
		else:
			df = pd.read_csv(motifs_fname)
		
		del modules



		### Create regulons from this table of enriched motifs.
		if not os.path.isfile(regulons_fname):
			regulons = df2regulons(df)
			pickle.dump(regulons, open(regulons_fname, 'wb'))
		else:
			regulons = pickle.load(open(regulons_fname, 'rb'))

		del df



		## Cellular regulon enrichment matrices
		if not os.path.isfile(aucell_train_fname):
			auc_train = aucell(data_train, regulons, num_workers=n_cores)
			auc_train.to_csv(aucell_train_fname, sep=',', header=True, index=True, compression='gzip')
		else:
			auc_train = pd.read_csv(aucell_train_fname, index_col=0)
Exemple #3
0
        ex_matrix, tf_names=tf_names,
        verbose=True)  # runs improved GRNBoost instance of GENIE3
    modules = list(modules_from_adjacencies(
        adjacencies, ex_matrix))  # identifies modules from GENIE3

    # save GRNBoost2 product so we don't have to repeat again
    adjacencies.to_csv("grnboost_output.csv")

    # load product in case something goes wrong
    adjacencies = pd.read_csv("grnboost_output.csv", index_col=0)

    # cisTarget process: IDs cis-regulatory footprints from motifs around the TSS
    with ProgressBar(
    ):  # calculate a list of enriched motifs and the corresponding target genes for all modules
        df = prune2df(dbs, modules, "motifs-v9-nr-mgi.txt")
    regulons = df2regulons(
        df)  # create regulons from this table of enriched motifs

    # save the discovered motifs and regulons
    df.to_csv(motifs_filename)
    with open(regulons_filename, "wb") as f:
        pickle.dump(regulons, f)

    # load the discovered motifs and regulons if saved previously
    df = load_motifs(motifs_filename)
    with open(regulons_filename, "rb") as f:
        regulons = pickle.load(f)

    # AUCell process: finds enrichment of each discovered regulon
    auc_matrix = aucell(ex_matrix, regulons, num_workers=4)

    # export the product back to R for analysis
Exemple #4
0
if __name__ =='__main__':
#	#1. Inference of co-expression modules
#	print('Inference...')
#	df_adj=grnboost2(df_cnt, tf_names=tf_name, verbose=True)
#	df_adj.to_csv(f'{fd_out}/adj.csv', index=False)
	
	#2. prune
	df_adj=pd.read_csv(f'{fd_out}/adj.csv')  #if missing, always stuck at 98%
	print('Prune...')
	l_mod=list(modules_from_adjacencies(df_adj, df_cnt))

	with ProgressBar():
		df_prune = prune2df(l_db, l_mod, f_motif)
	df_prune.to_csv(f'{fd_out}/prune.csv')
	
	#3. create regulon
	print('Regulon...')
	regulon=df2regulons(df_prune)

	#4. Save the enriched motifs and the discovered regulons
	with open(f'{fd_out}/regulon.pkl', "wb") as f:
		pickle.dump(regulon, f)
	
	#5. auc
	print('AUC...')
	with open(f'{fd_out}/regulon.pkl', "rb") as f:   #if missing, always stuck
		regulon=pickle.load(f)
		
	df_auc=aucell(df_cnt, regulon, num_workers=10)
	df_auc.to_csv(f'{fd_out}/auc.csv')