def calcTFs( expr, tf_names, db, prefix, motif_path='../data/pySCENIC/ref/motifs-v9-nr.hgnc-m0.001-o0.0.tbl', out_path='../data/pySCENIC', ppn=8): """Computes motifs, regulons and trancriptional factor activation using pySCENIC. Arguments --------- expr: `pandas DataFrame` cell X gene raw counts; FPKM; not TPM as coexpression will be calculated tf_names: `list` (`str`) curated human transcriptional factor downloaded from github: pySCENIC/ref/hs_hgnc_curated_tfs.txt db: `list` (`FeatherRankingDatabase()`) feather files, ranking genome [FeatherRankingDatabase(name="hg38__refseq-r80__10kb_up_and_down_tss")] prefix: `str` (default: `None`) Specify name to save files (eg, cell line names) Returns ------- Do not return but write files (the calc takes too long...) """ # Inference of co-expression modules adjacencies = grnboost2(expr, tf_names=tf_names, verbose=True) modules = list(modules_from_adjacencies(adjacencies, expr)) # Calculate a list of enriched motifs and the corresponding target genes for all modules. with ProgressBar(): df = prune2df(db, modules, motif_path, num_workers=ppn) # Create regulons from this table of enriched motifs. regulons = df2regulons(df) # Save the enriched motifs and the discovered regulons to disk. with open('{}/{}_motifs.csv'.format(out_path, prefix), "wb") as f: pickle.dump(regulons, f) auc_mtx = aucell(expr, regulons, num_workers=ppn) tfs = [tf.strip('(+)') for tf in auc_mtx.columns] auc_mtx.to_csv('{}/{}_auc_mtx.csv'.format(out_path, prefix)) print('finished calculation for %s' % (prefix))
## Prune modules for targets with cis regulatory footprints (aka RcisTarget) ### Calculate a list of enriched motifs and the corresponding target genes for all modules. if not os.path.isfile(motifs_fname): df = prune2df(dbs, modules, motif_annotations, num_workers=n_cores) df.to_csv(motifs_fname) else: df = pd.read_csv(motifs_fname) del modules ### Create regulons from this table of enriched motifs. if not os.path.isfile(regulons_fname): regulons = df2regulons(df) pickle.dump(regulons, open(regulons_fname, 'wb')) else: regulons = pickle.load(open(regulons_fname, 'rb')) del df ## Cellular regulon enrichment matrices if not os.path.isfile(aucell_train_fname): auc_train = aucell(data_train, regulons, num_workers=n_cores) auc_train.to_csv(aucell_train_fname, sep=',', header=True, index=True, compression='gzip') else: auc_train = pd.read_csv(aucell_train_fname, index_col=0)
ex_matrix, tf_names=tf_names, verbose=True) # runs improved GRNBoost instance of GENIE3 modules = list(modules_from_adjacencies( adjacencies, ex_matrix)) # identifies modules from GENIE3 # save GRNBoost2 product so we don't have to repeat again adjacencies.to_csv("grnboost_output.csv") # load product in case something goes wrong adjacencies = pd.read_csv("grnboost_output.csv", index_col=0) # cisTarget process: IDs cis-regulatory footprints from motifs around the TSS with ProgressBar( ): # calculate a list of enriched motifs and the corresponding target genes for all modules df = prune2df(dbs, modules, "motifs-v9-nr-mgi.txt") regulons = df2regulons( df) # create regulons from this table of enriched motifs # save the discovered motifs and regulons df.to_csv(motifs_filename) with open(regulons_filename, "wb") as f: pickle.dump(regulons, f) # load the discovered motifs and regulons if saved previously df = load_motifs(motifs_filename) with open(regulons_filename, "rb") as f: regulons = pickle.load(f) # AUCell process: finds enrichment of each discovered regulon auc_matrix = aucell(ex_matrix, regulons, num_workers=4) # export the product back to R for analysis
if __name__ =='__main__': # #1. Inference of co-expression modules # print('Inference...') # df_adj=grnboost2(df_cnt, tf_names=tf_name, verbose=True) # df_adj.to_csv(f'{fd_out}/adj.csv', index=False) #2. prune df_adj=pd.read_csv(f'{fd_out}/adj.csv') #if missing, always stuck at 98% print('Prune...') l_mod=list(modules_from_adjacencies(df_adj, df_cnt)) with ProgressBar(): df_prune = prune2df(l_db, l_mod, f_motif) df_prune.to_csv(f'{fd_out}/prune.csv') #3. create regulon print('Regulon...') regulon=df2regulons(df_prune) #4. Save the enriched motifs and the discovered regulons with open(f'{fd_out}/regulon.pkl', "wb") as f: pickle.dump(regulon, f) #5. auc print('AUC...') with open(f'{fd_out}/regulon.pkl', "rb") as f: #if missing, always stuck regulon=pickle.load(f) df_auc=aucell(df_cnt, regulon, num_workers=10) df_auc.to_csv(f'{fd_out}/auc.csv')