def aucell_command(args): """ Calculate regulon enrichment (as AUC values) for cells. """ LOGGER.info("Loading expression matrix.") ex_mtx = _load_expression_matrix(args) if any( args.regulons_fname.name.endswith(ext) for ext in FILE_EXTENSION2SEPARATOR.keys()): LOGGER.info("Creating regulons.") regulons = _df2regulons(args.regulons_fname.name, args.nomenclature) elif args.regulons_fname.name.endswith('.gmt'): LOGGER.info("Loading regulons.") regulons = GeneSignature.from_gmt(args.regulons_fname.name, args.nomenclature, field_separator='\t', gene_separator='\t') else: LOGGER.info("Loading regulons.") regulons = _load_modules(args.regulons_fname.name) LOGGER.info("Calculating enrichment.") auc_heatmap = aucell(ex_mtx, regulons, auc_threshold=args.auc_threshold, noweights=args.weights != 'yes', num_cores=args.num_workers) LOGGER.info("Writing results to file.") auc_heatmap.to_csv(args.output)
def test_aucell_mismatch(exp_matrix, gs): percentiles = derive_auc_threshold(exp_matrix) gss = [ GeneSignature(name="test", gene2weight=list(map("FAKE{}".format, range(100)))) ] + gs aucs_mtx = aucell(exp_matrix, gss, auc_threshold=percentiles[0.01], num_workers=1) print(aucs_mtx.head())
def aucell_command(args): """ Calculate regulon enrichment (as AUC values) for cells. """ LOGGER.info("Loading expression matrix.") try: ex_mtx = load_exp_matrix( args.expression_mtx_fname.name, (args.transpose == 'yes'), False, # sparse loading is disabled here for now args.cell_id_attribute, args.gene_attribute) except ValueError as e: LOGGER.error(e) sys.exit(1) LOGGER.info("Loading gene signatures.") try: signatures = load_signatures(args.signatures_fname.name) except ValueError as e: LOGGER.error(e) sys.exit(1) LOGGER.info("Calculating cellular enrichment.") auc_mtx = aucell(ex_mtx, signatures, auc_threshold=args.auc_threshold, noweights=(args.weights != 'yes'), seed=args.seed, num_workers=args.num_workers) LOGGER.info("Writing results to file.") extension = PurePath(args.output.name).suffixes if '.loom' in extension: try: copyfile(args.expression_mtx_fname.name, args.output.name) append_auc_mtx(args.output.name, auc_mtx, signatures, args.seed, args.num_workers) except OSError as e: LOGGER.error( "Expression matrix should be provided in the loom file format." ) sys.exit(1) elif args.output.name == '<stdout>': transpose = (args.transpose == 'yes') (auc_mtx.T if transpose else auc_mtx).to_csv(args.output) else: save_matrix(auc_mtx, args.output.name, (args.transpose == 'yes'))
def calcTFs( expr, tf_names, db, prefix, motif_path='../data/pySCENIC/ref/motifs-v9-nr.hgnc-m0.001-o0.0.tbl', out_path='../data/pySCENIC', ppn=8): """Computes motifs, regulons and trancriptional factor activation using pySCENIC. Arguments --------- expr: `pandas DataFrame` cell X gene raw counts; FPKM; not TPM as coexpression will be calculated tf_names: `list` (`str`) curated human transcriptional factor downloaded from github: pySCENIC/ref/hs_hgnc_curated_tfs.txt db: `list` (`FeatherRankingDatabase()`) feather files, ranking genome [FeatherRankingDatabase(name="hg38__refseq-r80__10kb_up_and_down_tss")] prefix: `str` (default: `None`) Specify name to save files (eg, cell line names) Returns ------- Do not return but write files (the calc takes too long...) """ # Inference of co-expression modules adjacencies = grnboost2(expr, tf_names=tf_names, verbose=True) modules = list(modules_from_adjacencies(adjacencies, expr)) # Calculate a list of enriched motifs and the corresponding target genes for all modules. with ProgressBar(): df = prune2df(db, modules, motif_path, num_workers=ppn) # Create regulons from this table of enriched motifs. regulons = df2regulons(df) # Save the enriched motifs and the discovered regulons to disk. with open('{}/{}_motifs.csv'.format(out_path, prefix), "wb") as f: pickle.dump(regulons, f) auc_mtx = aucell(expr, regulons, num_workers=ppn) tfs = [tf.strip('(+)') for tf in auc_mtx.columns] auc_mtx.to_csv('{}/{}_auc_mtx.csv'.format(out_path, prefix)) print('finished calculation for %s' % (prefix))
def calculate_regulon_enrichment(self): # Calculate regulon enrichment per cell using AUCell. # Create regulons with weight based on given key print("Using {} to weight the genes when running AUCell.".format( self.auc_regulon_weights_key)) regulon_signatures = list( map( lambda x: GeneSignature( name=x.name, gene2weight=self.get_regulon_gene_data( x, self.auc_regulon_weights_key), ), self.regulons, )) auc_mtx = aucell( self.ex_mtx, regulon_signatures, num_workers=self.num_workers) # (n_cells x n_regulons) auc_mtx = auc_mtx.loc[self.ex_mtx.index] return auc_mtx
### Create regulons from this table of enriched motifs. if not os.path.isfile(regulons_fname): regulons = df2regulons(df) pickle.dump(regulons, open(regulons_fname, 'wb')) else: regulons = pickle.load(open(regulons_fname, 'rb')) del df ## Cellular regulon enrichment matrices if not os.path.isfile(aucell_train_fname): auc_train = aucell(data_train, regulons, num_workers=n_cores) auc_train.to_csv(aucell_train_fname, sep=',', header=True, index=True, compression='gzip') else: auc_train = pd.read_csv(aucell_train_fname, index_col=0) if not os.path.isfile(aucell_test_fname): auc_test = aucell(data_test, regulons, num_workers=n_cores) auc_test.to_csv(aucell_test_fname, sep=',', header=True, index=True, compression='gzip') else: auc_test = pd.read_csv(aucell_test_fname, index_col=0) ## Filter regulons with low correlation between train and test auc_train[grouping_variable] = metadata[grouping_variable] auc_test[grouping_variable] = metadata[grouping_variable]
adjacencies, ex_matrix)) # identifies modules from GENIE3 # save GRNBoost2 product so we don't have to repeat again adjacencies.to_csv("grnboost_output.csv") # load product in case something goes wrong adjacencies = pd.read_csv("grnboost_output.csv", index_col=0) # cisTarget process: IDs cis-regulatory footprints from motifs around the TSS with ProgressBar( ): # calculate a list of enriched motifs and the corresponding target genes for all modules df = prune2df(dbs, modules, "motifs-v9-nr-mgi.txt") regulons = df2regulons( df) # create regulons from this table of enriched motifs # save the discovered motifs and regulons df.to_csv(motifs_filename) with open(regulons_filename, "wb") as f: pickle.dump(regulons, f) # load the discovered motifs and regulons if saved previously df = load_motifs(motifs_filename) with open(regulons_filename, "rb") as f: regulons = pickle.load(f) # AUCell process: finds enrichment of each discovered regulon auc_matrix = aucell(ex_matrix, regulons, num_workers=4) # export the product back to R for analysis auc_matrix.to_csv("SCENIC_export.csv")
#-------------Phase II: Prune modules for targets with cis regulatory footprints (aka RcisTarget)---- print("STARTING PHASE II") regulons = load_from_yaml(REGULONS_BIN_FNAME) print("LOADED regulons, type:") print(type(regulons)) #print(regulons) #-------------Phase III: Cellular regulon enrichment matrix (aka AUCell)---------------- print("STARTING PHASE III") #auc_mtx = aucell(ex_matrix, regulons, num_cores=nCores) #don't transpose ex_matrix again as it was already transposed above #originally num_workers, but it should be num_cores auc_mtx = aucell( ex_matrix, regulons, num_workers=nCores ) #don't transpose ex_matrix again as it was already transposed above #originally num_workers, but it should be num_cores auc_mtx.to_csv(AUC_FNAME, sep='\t') print("DEFINED auc_mtx") #auc_mtx = pd.read_csv(AUC_FNAME, sep='\t', header=0, index_col=0) #clustermap = sns.clustermap(auc_mtx, figsize=(8,8)) #clustermap.savefig(CLUSTERMAP_FNAME) #-------------Phase IV: BINARIZATION auc_binary, auc_thresholds = binarize(auc_mtx) print(auc_binary) auc_binary.to_csv(BINARYAUC_FNAME, sep='\t') auc_thresholds.to_csv(BINARYTHR_FNAME, sep='\t')
def test_aucell_w2(): ex_mtx = exp_matrix() percentiles = derive_auc_threshold(ex_mtx) aucs_mtx = aucell(ex_mtx, gs(), auc_threshold=percentiles[0.01], num_workers=4)
## Make a meta matrix for the regulon reg_num = [] reg_target = [] reg_tf = [] for i in regulons: reg_tf.append(i.transcription_factor) reg_target.append(list(i.gene2weight.keys())) reg_num.append(len(list(i.gene2weight.keys()))) reg_meta = pd.DataFrame([reg_num, reg_target]).T reg_meta.index = reg_tf reg_meta.columns = ['n_targets', 'targets'] ## Calculate the AUCell scores auc_mtx = aucell(exp_matrix, regulons, num_workers=8) auc_mtx.columns = auc_mtx.columns.str[:-3] ## Analysis the result with scanpy sc_auc_mtx = sc.AnnData(X=auc_mtx) sc_auc_mtx.var_names = [ str(i) + '(' + str(j) + ')' for i, j in zip(auc_mtx.columns, reg_meta.n_targets) ] sc_auc_mtx.obs = exp_meta ## Differential analysis for AUCell scores sc.tl.rank_genes_groups(sc_auc_mtx, 'leiden_cluster', method='wilcoxon', use_raw=True)
#from dask.diagnostics import ProgressBar #from arboreto.utils import load_tf_names #from arboreto.algo import grnboost2 from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase from pyscenic.utils import modules_from_adjacencies, load_motifs from pyscenic.prune import prune2df, df2regulons from pyscenic.aucell import aucell from pyscenic.binarization import binarize with open(snakemake.input[0], "rb") as f: regulons = pickle.load(f) ex_matrix = pd.read_csv(snakemake.input[1], sep='\t', header=0, index_col=0).T print("mtx print") auc_mtx = aucell(ex_matrix, regulons) thresholds, mat = binarize(auc_mtx) print("binarize done") print("binarise save") thresholds.to_csv(snakemake.output[0])
if __name__ =='__main__': # #1. Inference of co-expression modules # print('Inference...') # df_adj=grnboost2(df_cnt, tf_names=tf_name, verbose=True) # df_adj.to_csv(f'{fd_out}/adj.csv', index=False) #2. prune df_adj=pd.read_csv(f'{fd_out}/adj.csv') #if missing, always stuck at 98% print('Prune...') l_mod=list(modules_from_adjacencies(df_adj, df_cnt)) with ProgressBar(): df_prune = prune2df(l_db, l_mod, f_motif) df_prune.to_csv(f'{fd_out}/prune.csv') #3. create regulon print('Regulon...') regulon=df2regulons(df_prune) #4. Save the enriched motifs and the discovered regulons with open(f'{fd_out}/regulon.pkl', "wb") as f: pickle.dump(regulon, f) #5. auc print('AUC...') with open(f'{fd_out}/regulon.pkl', "rb") as f: #if missing, always stuck regulon=pickle.load(f) df_auc=aucell(df_cnt, regulon, num_workers=10) df_auc.to_csv(f'{fd_out}/auc.csv')
args = parser_grn.parse_args() # Do stuff ex_matrix_df = utils.get_matrix(loom_file_path=args.expression_mtx_fname.name, gene_attribute=args.gene_attribute, cell_id_attribute=args.cell_id_attribute) signatures = utils.read_signatures_from_tsv_dir( dpath=args.signatures_fname, noweights=False, weight_threshold=args.min_regulon_gene_occurrence, min_genes=args.min_genes) if len(signatures) == 0: raise Exception( f"No signature passing filtering. Please consider to adapt 'min_genes_regulon = {args.min_genes_regulon}' and 'min_regulon_gene_occurrence = {args.min_regulon_gene_occurrence}' (see params.sc.scenic.aucell). Make sure these settings are smaller than numRuns (params.sc.scenic)." ) auc_threshold = args.auc_threshold if args.percentile_threshold is not None: percentiles = derive_auc_threshold(ex_matrix_df) auc_threshold = percentiles[args.percentile_threshold] aucs_mtx = aucell(ex_matrix_df, signatures, auc_threshold=auc_threshold, num_workers=args.num_workers) aucs_mtx.to_csv(path_or_buf=args.output, index=True, sep='\t')
def test_aucell_w2(exp_matrix, gs): percentiles = derive_auc_threshold(exp_matrix) aucs_mtx = aucell(exp_matrix, gs, auc_threshold=percentiles[0.01], num_workers=4)
import pandas as pd import numpy as np #from dask.diagnostics import ProgressBar #from arboreto.utils import load_tf_names #from arboreto.algo import grnboost2 from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase from pyscenic.utils import modules_from_adjacencies, load_motifs from pyscenic.prune import prune2df, df2regulons from pyscenic.aucell import aucell from pyscenic.binarization import binarize with open(snakemake.input[0], "rb") as f: regulons = pickle.load(f) ex_matrix = pd.read_csv(snakemake.input[1], sep='\t', header=0, index_col=0).T print("mtx print") auc_mtx = aucell(ex_matrix, regulons, cores = 6) thresholds, mat = binarize(auc_mtx) print("binarize done") print("binarise save") thresholds.to_csv(snakemake.output[0])