Esempio n. 1
0
def load_signatures(fname: str) -> Sequence[Type[GeneSignature]]:
    """
    Load genes signatures from disk.

    Supported file formats are GMT, DAT (pickled), YAML or CSV (enriched motifs).

    :param fname: The name of the file that contains the signatures.
    :return: A list of gene signatures.
    """
    extension = PurePath(fname).suffixes
    if is_valid_suffix(extension, 'ctx'):
        # csv/tsv
        return df2regulons(
            load_motifs(fname, sep=suffixes_to_separator(extension)))
    elif is_valid_suffix(extension, 'ctx_yaml'):
        return load_from_yaml(fname)
    elif '.gmt' in extension:
        sep = guess_separator(fname)
        return GeneSignature.from_gmt(fname,
                                      field_separator=sep,
                                      gene_separator=sep)
    elif '.dat' in extension:
        with openfile(fname, 'rb') as f:
            return pickle.load(f)
    else:
        raise ValueError("Unknown file format \"{}\".".format(fname))
Esempio n. 2
0
def load_signatures(fname: str) -> Sequence[Type[GeneSignature]]:
    """
    Load genes signatures from disk.

    Supported file formats are GMT, DAT (pickled), YAML or CSV (enriched motifs).

    :param fname: The name of the file that contains the signatures.
    :return: A list of gene signatures.
    """
    extension = os.path.splitext(fname)[1].lower()
    if extension in FILE_EXTENSION2SEPARATOR.keys():
        return df2regulons(
            load_motifs(fname, sep=FILE_EXTENSION2SEPARATOR[extension]))
    elif extension in {'.yaml', '.yml'}:
        return load_from_yaml(fname)
    elif extension.endswith('.gmt'):
        sep = guess_separator(fname)
        return GeneSignature.from_gmt(fname,
                                      field_separator=sep,
                                      gene_separator=sep)
    elif extension == '.dat':
        with open(fname, 'rb') as f:
            return pickle.load(f)
    else:
        raise ValueError("Unknown file format \"{}\".".format(fname))
Esempio n. 3
0
        adjacencies, ex_matrix))  # identifies modules from GENIE3

    # save GRNBoost2 product so we don't have to repeat again
    adjacencies.to_csv("grnboost_output.csv")

    # load product in case something goes wrong
    adjacencies = pd.read_csv("grnboost_output.csv", index_col=0)

    # cisTarget process: IDs cis-regulatory footprints from motifs around the TSS
    with ProgressBar(
    ):  # calculate a list of enriched motifs and the corresponding target genes for all modules
        df = prune2df(dbs, modules, "motifs-v9-nr-mgi.txt")
    regulons = df2regulons(
        df)  # create regulons from this table of enriched motifs

    # save the discovered motifs and regulons
    df.to_csv(motifs_filename)
    with open(regulons_filename, "wb") as f:
        pickle.dump(regulons, f)

    # load the discovered motifs and regulons if saved previously
    df = load_motifs(motifs_filename)
    with open(regulons_filename, "rb") as f:
        regulons = pickle.load(f)

    # AUCell process: finds enrichment of each discovered regulon
    auc_matrix = aucell(ex_matrix, regulons, num_workers=4)

    # export the product back to R for analysis
    auc_matrix.to_csv("SCENIC_export.csv")
Esempio n. 4
0
import os
sc.settings.set_figure_params(frameon=True, color_map='Spectral_r')
sc.settings.verbosity = 3
plt.rcParams["axes.grid"] = False

# Run time for demo data: several minutes.

DATA_FOLDER = './Data/'
exp_matrix = pd.read_csv(os.path.join(DATA_FOLDER, 'DC_exp.csv'), index_col=0)
exp_meta = pd.read_csv(os.path.join(DATA_FOLDER, 'DC_meta.txt'),
                       sep='\t',
                       index_col=0)

## Generate the regulons
Motifs_NAME = 'exp_matrix'
motifs = load_motifs(
    os.path.join(DATA_FOLDER, '{}.motifs.csv'.format(Motifs_NAME)))
regulons = df2regulons(motifs)

## Make a meta matrix for the regulon
reg_num = []
reg_target = []
reg_tf = []
for i in regulons:
    reg_tf.append(i.transcription_factor)
    reg_target.append(list(i.gene2weight.keys()))
    reg_num.append(len(list(i.gene2weight.keys())))

reg_meta = pd.DataFrame([reg_num, reg_target]).T
reg_meta.index = reg_tf
reg_meta.columns = ['n_targets', 'targets']