Exemple #1
0
def gmt2regions(gmt_fname, db_fname, delineation_code, fraction):
    db = RegionRankingDatabase(fname=db_fname, name=os.path.basename(db_fname))
    signatures = GeneSignature.from_gmt(gmt_fname,
                                        nomenclature=db.nomenclature)
    delineation = CODE2DELINEATION[delineation_code]
    for signature in signatures:
        sys.stdout(
            signature.name + ',' +
            ','.join(convert(signature, db, delineation, fraction).genes))
Exemple #2
0
def load_modules(fname: str) -> Sequence[Type[GeneSignature]]:
    # Loading from YAML is extremely slow. Therefore this is a potential performance improvement.
    # Potential improvements are switching to JSON or to use a CLoader:
    # https://stackoverflow.com/questions/27743711/can-i-speedup-yaml
    # The alternative for which was opted in the end is binary pickling.
    extension = PurePath(fname).suffixes
    if is_valid_suffix(extension, 'ctx_yaml'):
        return load_from_yaml(fname)
    elif '.dat' in extension:
        with openfile(fname, 'rb') as f:
            return pickle.load(f)
    elif '.gmt' in extension:
        return GeneSignature.from_gmt(fname)
    else:
        raise ValueError("Unknown file format for \"{}\".".format(fname))
Exemple #3
0
def load_modules(fname: str) -> Sequence[Type[GeneSignature]]:
    # Loading from YAML is extremely slow. Therefore this is a potential performance improvement.
    # Potential improvements are switching to JSON or to use a CLoader:
    # https://stackoverflow.com/questions/27743711/can-i-speedup-yaml
    # The alternative for which was opted in the end is binary pickling.
    if fname.endswith('.yaml') or fname.endswith('.yml'):
        return load_from_yaml(fname)
    elif fname.endswith('.dat'):
        with open(fname, 'rb') as f:
            return pickle.load(f)
    elif fname.endswith('.gmt'):
        sep = guess_separator(fname)
        return GeneSignature.from_gmt(fname,
                                      field_separator=sep,
                                      gene_separator=sep)
    else:
        raise ValueError("Unknown file format for \"{}\".".format(fname))
Exemple #4
0
def aucell_command(args):
    """
    Calculate regulon enrichment (as AUC values) for cells.
    """
    LOGGER.info("Loading expression matrix.")
    ex_mtx = _load_expression_matrix(args)

    if any(args.regulons_fname.name.endswith(ext) for ext in FILE_EXTENSION2SEPARATOR.keys()):
        LOGGER.info("Creating regulons.")
        regulons = _df2regulons(args.regulons_fname.name)
    elif args.regulons_fname.name.endswith('.gmt'):
        LOGGER.info("Loading regulons.")
        regulons = GeneSignature.from_gmt(args.regulons_fname.name,
                                           field_separator='\t', gene_separator='\t')
    else:
        LOGGER.info("Loading regulons.")
        regulons = _load_modules(args.regulons_fname.name)

    LOGGER.info("Calculating enrichment.")
    auc_heatmap = aucell(ex_mtx, regulons, auc_threshold=args.auc_threshold,
                         noweights=args.weights != 'yes', num_workers=args.num_workers)

    LOGGER.info("Writing results to file.")
    auc_heatmap.to_csv(args.output)
Exemple #5
0
def test_add():
    gss = GeneSignature.from_gmt(field_separator='\t',
                                 gene_separator='\t',
                                 fname=TEST_SIGNATURE_FNAME)
    res = gss[0].add("MEF2")
    assert "MEF2" in res
Exemple #6
0
def gs():
    return GeneSignature.from_gmt(TEST_SIGNATURE_FNAME,
                                  gene_separator="\t", field_separator="\t", )[0]