コード例 #1
0
def aucell_command(args):
    """
    Calculate regulon enrichment (as AUC values) for cells.
    """
    LOGGER.info("Loading expression matrix.")
    ex_mtx = _load_expression_matrix(args)

    if any(
            args.regulons_fname.name.endswith(ext)
            for ext in FILE_EXTENSION2SEPARATOR.keys()):
        LOGGER.info("Creating regulons.")
        regulons = _df2regulons(args.regulons_fname.name, args.nomenclature)
    elif args.regulons_fname.name.endswith('.gmt'):
        LOGGER.info("Loading regulons.")
        regulons = GeneSignature.from_gmt(args.regulons_fname.name,
                                          args.nomenclature,
                                          field_separator='\t',
                                          gene_separator='\t')
    else:
        LOGGER.info("Loading regulons.")
        regulons = _load_modules(args.regulons_fname.name)

    LOGGER.info("Calculating enrichment.")
    auc_heatmap = aucell(ex_mtx,
                         regulons,
                         auc_threshold=args.auc_threshold,
                         noweights=args.weights != 'yes',
                         num_cores=args.num_workers)

    LOGGER.info("Writing results to file.")
    auc_heatmap.to_csv(args.output)
コード例 #2
0
ファイル: test_aucell.py プロジェクト: dpaysan/pySCENIC
def test_aucell_mismatch(exp_matrix, gs):
    percentiles = derive_auc_threshold(exp_matrix)
    gss = [
        GeneSignature(name="test",
                      gene2weight=list(map("FAKE{}".format, range(100))))
    ] + gs
    aucs_mtx = aucell(exp_matrix,
                      gss,
                      auc_threshold=percentiles[0.01],
                      num_workers=1)
    print(aucs_mtx.head())
コード例 #3
0
def aucell_command(args):
    """
    Calculate regulon enrichment (as AUC values) for cells.
    """
    LOGGER.info("Loading expression matrix.")
    try:
        ex_mtx = load_exp_matrix(
            args.expression_mtx_fname.name,
            (args.transpose == 'yes'),
            False,  # sparse loading is disabled here for now
            args.cell_id_attribute,
            args.gene_attribute)
    except ValueError as e:
        LOGGER.error(e)
        sys.exit(1)

    LOGGER.info("Loading gene signatures.")
    try:
        signatures = load_signatures(args.signatures_fname.name)
    except ValueError as e:
        LOGGER.error(e)
        sys.exit(1)

    LOGGER.info("Calculating cellular enrichment.")
    auc_mtx = aucell(ex_mtx,
                     signatures,
                     auc_threshold=args.auc_threshold,
                     noweights=(args.weights != 'yes'),
                     seed=args.seed,
                     num_workers=args.num_workers)

    LOGGER.info("Writing results to file.")
    extension = PurePath(args.output.name).suffixes
    if '.loom' in extension:
        try:
            copyfile(args.expression_mtx_fname.name, args.output.name)
            append_auc_mtx(args.output.name, auc_mtx, signatures, args.seed,
                           args.num_workers)
        except OSError as e:
            LOGGER.error(
                "Expression matrix should be provided in the loom file format."
            )
            sys.exit(1)
    elif args.output.name == '<stdout>':
        transpose = (args.transpose == 'yes')
        (auc_mtx.T if transpose else auc_mtx).to_csv(args.output)
    else:
        save_matrix(auc_mtx, args.output.name, (args.transpose == 'yes'))
コード例 #4
0
def calcTFs(
        expr,
        tf_names,
        db,
        prefix,
        motif_path='../data/pySCENIC/ref/motifs-v9-nr.hgnc-m0.001-o0.0.tbl',
        out_path='../data/pySCENIC',
        ppn=8):
    """Computes motifs, regulons and trancriptional factor activation using pySCENIC.

    Arguments
    ---------
    expr: `pandas DataFrame` 
        cell X gene raw counts; FPKM; not TPM as coexpression will be calculated
    tf_names: `list` (`str`)
        curated human transcriptional factor downloaded from github: pySCENIC/ref/hs_hgnc_curated_tfs.txt
    db: `list` (`FeatherRankingDatabase()`)
        feather files, ranking genome [FeatherRankingDatabase(name="hg38__refseq-r80__10kb_up_and_down_tss")]
    prefix: `str` (default: `None`)
        Specify name to save files (eg, cell line names)

    Returns
    -------
    Do not return but write files (the calc takes too long...)
    """

    # Inference of co-expression modules
    adjacencies = grnboost2(expr, tf_names=tf_names, verbose=True)
    modules = list(modules_from_adjacencies(adjacencies, expr))

    # Calculate a list of enriched motifs and the corresponding target genes for all modules.
    with ProgressBar():
        df = prune2df(db, modules, motif_path, num_workers=ppn)

    # Create regulons from this table of enriched motifs.
    regulons = df2regulons(df)

    # Save the enriched motifs and the discovered regulons to disk.
    with open('{}/{}_motifs.csv'.format(out_path, prefix), "wb") as f:
        pickle.dump(regulons, f)

    auc_mtx = aucell(expr, regulons, num_workers=ppn)
    tfs = [tf.strip('(+)') for tf in auc_mtx.columns]
    auc_mtx.to_csv('{}/{}_auc_mtx.csv'.format(out_path, prefix))

    print('finished calculation for %s' % (prefix))
コード例 #5
0
 def calculate_regulon_enrichment(self):
     # Calculate regulon enrichment per cell using AUCell.
     # Create regulons with weight based on given key
     print("Using {} to weight the genes when running AUCell.".format(
         self.auc_regulon_weights_key))
     regulon_signatures = list(
         map(
             lambda x: GeneSignature(
                 name=x.name,
                 gene2weight=self.get_regulon_gene_data(
                     x, self.auc_regulon_weights_key),
             ),
             self.regulons,
         ))
     auc_mtx = aucell(
         self.ex_mtx, regulon_signatures,
         num_workers=self.num_workers)  # (n_cells x n_regulons)
     auc_mtx = auc_mtx.loc[self.ex_mtx.index]
     return auc_mtx

		### Create regulons from this table of enriched motifs.
		if not os.path.isfile(regulons_fname):
			regulons = df2regulons(df)
			pickle.dump(regulons, open(regulons_fname, 'wb'))
		else:
			regulons = pickle.load(open(regulons_fname, 'rb'))

		del df



		## Cellular regulon enrichment matrices
		if not os.path.isfile(aucell_train_fname):
			auc_train = aucell(data_train, regulons, num_workers=n_cores)
			auc_train.to_csv(aucell_train_fname, sep=',', header=True, index=True, compression='gzip')
		else:
			auc_train = pd.read_csv(aucell_train_fname, index_col=0)

		if not os.path.isfile(aucell_test_fname):
			auc_test = aucell(data_test, regulons, num_workers=n_cores)
			auc_test.to_csv(aucell_test_fname, sep=',', header=True, index=True, compression='gzip')
		else:
			auc_test = pd.read_csv(aucell_test_fname, index_col=0)



		## Filter regulons with low correlation between train and test
		auc_train[grouping_variable] = metadata[grouping_variable]
		auc_test[grouping_variable] = metadata[grouping_variable]
コード例 #7
0
        adjacencies, ex_matrix))  # identifies modules from GENIE3

    # save GRNBoost2 product so we don't have to repeat again
    adjacencies.to_csv("grnboost_output.csv")

    # load product in case something goes wrong
    adjacencies = pd.read_csv("grnboost_output.csv", index_col=0)

    # cisTarget process: IDs cis-regulatory footprints from motifs around the TSS
    with ProgressBar(
    ):  # calculate a list of enriched motifs and the corresponding target genes for all modules
        df = prune2df(dbs, modules, "motifs-v9-nr-mgi.txt")
    regulons = df2regulons(
        df)  # create regulons from this table of enriched motifs

    # save the discovered motifs and regulons
    df.to_csv(motifs_filename)
    with open(regulons_filename, "wb") as f:
        pickle.dump(regulons, f)

    # load the discovered motifs and regulons if saved previously
    df = load_motifs(motifs_filename)
    with open(regulons_filename, "rb") as f:
        regulons = pickle.load(f)

    # AUCell process: finds enrichment of each discovered regulon
    auc_matrix = aucell(ex_matrix, regulons, num_workers=4)

    # export the product back to R for analysis
    auc_matrix.to_csv("SCENIC_export.csv")
コード例 #8
0
    #-------------Phase II: Prune modules for targets with cis regulatory footprints (aka RcisTarget)----

    print("STARTING PHASE II")

    regulons = load_from_yaml(REGULONS_BIN_FNAME)
    print("LOADED regulons, type:")
    print(type(regulons))
    #print(regulons)

    #-------------Phase III: Cellular regulon enrichment matrix (aka AUCell)----------------

    print("STARTING PHASE III")

    #auc_mtx = aucell(ex_matrix, regulons, num_cores=nCores)    #don't transpose ex_matrix again as it was already transposed above #originally num_workers, but it should be num_cores
    auc_mtx = aucell(
        ex_matrix, regulons, num_workers=nCores
    )  #don't transpose ex_matrix again as it was already transposed above #originally num_workers, but it should be num_cores
    auc_mtx.to_csv(AUC_FNAME, sep='\t')
    print("DEFINED auc_mtx")

    #auc_mtx = pd.read_csv(AUC_FNAME, sep='\t', header=0, index_col=0)

    #clustermap = sns.clustermap(auc_mtx, figsize=(8,8))
    #clustermap.savefig(CLUSTERMAP_FNAME)

    #-------------Phase IV: BINARIZATION

    auc_binary, auc_thresholds = binarize(auc_mtx)
    print(auc_binary)
    auc_binary.to_csv(BINARYAUC_FNAME, sep='\t')
    auc_thresholds.to_csv(BINARYTHR_FNAME, sep='\t')
コード例 #9
0
ファイル: test_aucell.py プロジェクト: zorrodong/pySCENIC
def test_aucell_w2():
    ex_mtx = exp_matrix()
    percentiles = derive_auc_threshold(ex_mtx)
    aucs_mtx = aucell(ex_mtx, gs(), auc_threshold=percentiles[0.01], num_workers=4)
コード例 #10
0
## Make a meta matrix for the regulon
reg_num = []
reg_target = []
reg_tf = []
for i in regulons:
    reg_tf.append(i.transcription_factor)
    reg_target.append(list(i.gene2weight.keys()))
    reg_num.append(len(list(i.gene2weight.keys())))

reg_meta = pd.DataFrame([reg_num, reg_target]).T
reg_meta.index = reg_tf
reg_meta.columns = ['n_targets', 'targets']

## Calculate the AUCell scores
auc_mtx = aucell(exp_matrix, regulons, num_workers=8)
auc_mtx.columns = auc_mtx.columns.str[:-3]

## Analysis the result with scanpy
sc_auc_mtx = sc.AnnData(X=auc_mtx)
sc_auc_mtx.var_names = [
    str(i) + '(' + str(j) + ')'
    for i, j in zip(auc_mtx.columns, reg_meta.n_targets)
]
sc_auc_mtx.obs = exp_meta

## Differential analysis for AUCell scores
sc.tl.rank_genes_groups(sc_auc_mtx,
                        'leiden_cluster',
                        method='wilcoxon',
                        use_raw=True)
コード例 #11
0
    #from dask.diagnostics import ProgressBar

    #from arboreto.utils import load_tf_names
    #from arboreto.algo import grnboost2

    from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase
    from pyscenic.utils import modules_from_adjacencies, load_motifs
    from pyscenic.prune import prune2df, df2regulons
    from pyscenic.aucell import aucell
    from pyscenic.binarization import binarize

    with open(snakemake.input[0], "rb") as f:
        regulons = pickle.load(f)

    ex_matrix = pd.read_csv(snakemake.input[1],
                            sep='\t',
                            header=0,
                            index_col=0).T

    print("mtx print")

    auc_mtx = aucell(ex_matrix, regulons)

    thresholds, mat = binarize(auc_mtx)

    print("binarize done")

    print("binarise save")
    thresholds.to_csv(snakemake.output[0])
コード例 #12
0
if __name__ =='__main__':
#	#1. Inference of co-expression modules
#	print('Inference...')
#	df_adj=grnboost2(df_cnt, tf_names=tf_name, verbose=True)
#	df_adj.to_csv(f'{fd_out}/adj.csv', index=False)
	
	#2. prune
	df_adj=pd.read_csv(f'{fd_out}/adj.csv')  #if missing, always stuck at 98%
	print('Prune...')
	l_mod=list(modules_from_adjacencies(df_adj, df_cnt))

	with ProgressBar():
		df_prune = prune2df(l_db, l_mod, f_motif)
	df_prune.to_csv(f'{fd_out}/prune.csv')
	
	#3. create regulon
	print('Regulon...')
	regulon=df2regulons(df_prune)

	#4. Save the enriched motifs and the discovered regulons
	with open(f'{fd_out}/regulon.pkl', "wb") as f:
		pickle.dump(regulon, f)
	
	#5. auc
	print('AUC...')
	with open(f'{fd_out}/regulon.pkl', "rb") as f:   #if missing, always stuck
		regulon=pickle.load(f)
		
	df_auc=aucell(df_cnt, regulon, num_workers=10)
	df_auc.to_csv(f'{fd_out}/auc.csv')
コード例 #13
0
args = parser_grn.parse_args()

# Do stuff

ex_matrix_df = utils.get_matrix(loom_file_path=args.expression_mtx_fname.name,
                                gene_attribute=args.gene_attribute,
                                cell_id_attribute=args.cell_id_attribute)
signatures = utils.read_signatures_from_tsv_dir(
    dpath=args.signatures_fname,
    noweights=False,
    weight_threshold=args.min_regulon_gene_occurrence,
    min_genes=args.min_genes)

if len(signatures) == 0:
    raise Exception(
        f"No signature passing filtering. Please consider to adapt 'min_genes_regulon = {args.min_genes_regulon}' and 'min_regulon_gene_occurrence = {args.min_regulon_gene_occurrence}' (see params.sc.scenic.aucell). Make sure these settings are smaller than numRuns (params.sc.scenic)."
    )

auc_threshold = args.auc_threshold

if args.percentile_threshold is not None:
    percentiles = derive_auc_threshold(ex_matrix_df)
    auc_threshold = percentiles[args.percentile_threshold]

aucs_mtx = aucell(ex_matrix_df,
                  signatures,
                  auc_threshold=auc_threshold,
                  num_workers=args.num_workers)
aucs_mtx.to_csv(path_or_buf=args.output, index=True, sep='\t')
コード例 #14
0
ファイル: test_aucell.py プロジェクト: dpaysan/pySCENIC
def test_aucell_w2(exp_matrix, gs):
    percentiles = derive_auc_threshold(exp_matrix)
    aucs_mtx = aucell(exp_matrix,
                      gs,
                      auc_threshold=percentiles[0.01],
                      num_workers=4)
コード例 #15
0
    import pandas as pd
    import numpy as np

    #from dask.diagnostics import ProgressBar

    #from arboreto.utils import load_tf_names
    #from arboreto.algo import grnboost2

    from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase
    from pyscenic.utils import modules_from_adjacencies, load_motifs
    from pyscenic.prune import prune2df, df2regulons
    from pyscenic.aucell import aucell
    from pyscenic.binarization import binarize


    with open(snakemake.input[0], "rb") as f:
        regulons = pickle.load(f)

    ex_matrix = pd.read_csv(snakemake.input[1], sep='\t', header=0, index_col=0).T
            
    print("mtx print")
    
    auc_mtx = aucell(ex_matrix, regulons, cores = 6)

    thresholds, mat = binarize(auc_mtx)

    print("binarize done")

    print("binarise save")
    thresholds.to_csv(snakemake.output[0])