예제 #1
0
    def binarize(self):
        from pyscenic.binarization import binarize
        from pyscenic.binarization import plot_binarization
        #Binarize AUCell_mtx
        binar, auc_thresholds = binarize(self.auc_mtx)

        return binar
	auc_mtx = aucell(data, cortest_passed_regulons, num_workers=n_cores)
	auc_mtx.to_csv(final_regulons_aucell_fname, sep=',', header=True, index=True, compression='gzip')



	## Calculate mean score per regulon from multiple iterations
	auc_mtx = auc_mtx.T
	auc_mtx['regulon'] = [re.sub('[.].*', '', i) for i in auc_mtx.index]
	auc_mtx_mean = auc_mtx.groupby('regulon').mean().T
	auc_mtx_mean.to_csv(final_regulons_aucell_means_fname, sep=',', header=True, index=True, compression='gzip')



	## Binarize mean scores
	bin_mtx, _ = binarize(auc_mtx_mean, num_workers=n_cores)
	bin_mtx.to_csv(final_regulons_aucell_means_bin_fname, sep=',', header=True, index=True, compression='gzip')



	## Calculate Regulon Specificity Scores (RSS) (doi: 10.1016/j.celrep.2018.10.045)
	rss = regulon_specificity_scores(auc_mtx_mean, metadata['celltype'])
	rss.to_csv(final_regulons_aucell_means_rss_fname, sep=',', header=True, index=True, compression='gzip')



	## Plot RSSs
	cts = rss.index.sort_values()
	fig, axs = plt.subplots(4, 4, figsize=[15, 15])
	for i in range(0, 4):
		for j in range(0, 4):
예제 #3
0
def append_auc_mtx(
    fname: str,
    ex_mtx: pd.DataFrame,
    auc_mtx: pd.DataFrame,
    regulons: Sequence[Type[GeneSignature]],
    seed=None,
    num_workers=1,
) -> None:
    """

    Append AUC matrix to loom file.

    :param fname: The name of loom file to be append to.
    :param auc_mtx: The matrix that contains the AUC values.
    :param regulons: Collection of regulons that were used for calculation of the AUC values.
    """

    # Fetch sequence logo from regulon's context.
    def fetch_logo(context):
        for elem in context:
            if elem.endswith('.png'):
                return elem
        return ""

    try:
        name2logo = {reg.name: fetch_logo(reg.context) for reg in regulons}
    except AttributeError:
        name2logo = {}

    # Binarize matrix for AUC thresholds.
    _, auc_thresholds = binarize(auc_mtx, seed=seed, num_workers=num_workers)
    regulon_thresholds = [{
        "regulon":
        name,
        "defaultThresholdValue":
        (threshold if isinstance(threshold, float) else threshold[0]),
        "defaultThresholdName":
        "gaussian_mixture_split",
        "allThresholds": {
            "gaussian_mixture_split":
            (threshold if isinstance(threshold, float) else threshold[0])
        },
        "motifData":
        name2logo.get(name, ""),
    } for name, threshold in auc_thresholds.iteritems()]

    # Calculate the number of genes per cell.
    binary_mtx = ex_mtx.copy()
    binary_mtx[binary_mtx != 0] = 1.0
    ngenes = binary_mtx.sum(axis=1).astype(int)

    # Encode genes in regulons as "binary" membership matrix.
    genes = np.array(ex_mtx.columns)
    n_genes = len(genes)
    n_regulons = len(regulons)
    data = np.zeros(shape=(n_genes, n_regulons), dtype=int)
    for idx, regulon in enumerate(regulons):
        data[:, idx] = np.isin(genes, regulon.genes).astype(int)
    regulon_assignment = pd.DataFrame(data=data,
                                      index=ex_mtx.columns,
                                      columns=list(
                                          map(attrgetter('name'), regulons)))

    # Create meta-data structure.
    def create_structure_array(df):
        # Create a numpy structured array
        return np.array([tuple(row) for row in df.values],
                        dtype=np.dtype(list(zip(df.columns, df.dtypes))))

    with lp.connect(fname, validate=False) as ds:
        # The orientation of the loom file is always:
        #   - Columns represent cells or aggregates of cells
        # 	- Rows represent genes
        ds.ca[ATTRIBUTE_NAME_REGULONS_AUC] = create_structure_array(auc_mtx)
        ds.ra[ATTRIBUTE_NAME_REGULONS] = create_structure_array(
            regulon_assignment)
        if ATTRIBUTE_NAME_METADATA in ds.attrs:
            try:
                meta_data = json.loads(ds.attrs[ATTRIBUTE_NAME_METADATA])
            except json.decoder.JSONDecodeError:
                meta_data = decompress_meta(ds.attrs[ATTRIBUTE_NAME_METADATA])
        else:
            meta_data = {}
        meta_data["regulonThresholds"] = regulon_thresholds
        ds.attrs[ATTRIBUTE_NAME_METADATA] = compress_meta(meta_data)
예제 #4
0
    regulons = load_from_yaml(REGULONS_BIN_FNAME)
    print("LOADED regulons, type:")
    print(type(regulons))
    #print(regulons)

    #-------------Phase III: Cellular regulon enrichment matrix (aka AUCell)----------------

    print("STARTING PHASE III")

    #auc_mtx = aucell(ex_matrix, regulons, num_cores=nCores)    #don't transpose ex_matrix again as it was already transposed above #originally num_workers, but it should be num_cores
    auc_mtx = aucell(
        ex_matrix, regulons, num_workers=nCores
    )  #don't transpose ex_matrix again as it was already transposed above #originally num_workers, but it should be num_cores
    auc_mtx.to_csv(AUC_FNAME, sep='\t')
    print("DEFINED auc_mtx")

    #auc_mtx = pd.read_csv(AUC_FNAME, sep='\t', header=0, index_col=0)

    #clustermap = sns.clustermap(auc_mtx, figsize=(8,8))
    #clustermap.savefig(CLUSTERMAP_FNAME)

    #-------------Phase IV: BINARIZATION

    auc_binary, auc_thresholds = binarize(auc_mtx)
    print(auc_binary)
    auc_binary.to_csv(BINARYAUC_FNAME, sep='\t')
    auc_thresholds.to_csv(BINARYTHR_FNAME, sep='\t')

    print("FINISHED!")
예제 #5
0
    #from dask.diagnostics import ProgressBar

    #from arboreto.utils import load_tf_names
    #from arboreto.algo import grnboost2

    from pyscenic.rnkdb import FeatherRankingDatabase as RankingDatabase
    from pyscenic.utils import modules_from_adjacencies, load_motifs
    from pyscenic.prune import prune2df, df2regulons
    from pyscenic.aucell import aucell
    from pyscenic.binarization import binarize

    with open(snakemake.input[0], "rb") as f:
        regulons = pickle.load(f)

    ex_matrix = pd.read_csv(snakemake.input[1],
                            sep='\t',
                            header=0,
                            index_col=0).T

    print("mtx print")

    auc_mtx = aucell(ex_matrix, regulons)

    thresholds, mat = binarize(auc_mtx)

    print("binarize done")

    print("binarise save")
    thresholds.to_csv(snakemake.output[0])
예제 #6
0
 def binarize_regulon_enrichment(self):
     _, auc_thresholds = binarize(self.auc_mtx)
     return auc_thresholds