Example #1
def test_assc_stats(prt=sys.stdout):
    """Test association statistics."""
    associations = [
        ('hsa', 'goa_human.gaf'), # human
        ('mus', 'mgi.gaf'),       # mouse
        ('dme', 'fb.gaf')]        # fly
    godag = get_godag(os.path.join(REPO, "go-basic.obo"), loading_bar=None)
    describe_go2obj(godag, prt)
    obj = StatsDescribe('Assc', "{:6,}")
    obj.prt_hdr(prt, "Assc.")
    for org, assc_name in associations:
        fin_assc = os.path.join(REPO, assc_name)
        describe_assc(org, fin_assc, godag, obj, prt)
Example #3
def test_statsdescribe():
    """Use StatsDescribe to create a markdown table.

name     | # fdr_bh | range of fdr_bh      | 25th perc|   median | 75th perc|     mean | stddev
GOATOOLS |       59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02

    #pylint: disable=no-member
    nts_goids = [nt for nt in nts if nt.p_fdr_bh < 0.05]
    fdr_vals = [nt.p_fdr_bh for nt in nts_goids]
    statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}")
    statsobj.prt_data("GOATOOLS", fdr_vals)
def test_statsdescribe():
    """Use StatsDescribe to create a markdown table.

name     | # fdr_bh | range of fdr_bh      | 25th perc|   median | 75th perc|     mean | stddev
GOATOOLS |       59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02

    #pylint: disable=no-member
    # Somehow goea_results contains fields of empty string, which we can check with:
    # print([(nt.GO, nt.p_fdr_bh) for nt in goea_results])
    nts_goids = [nt for nt in goea_results if nt.p_fdr_bh != '' and nt.p_fdr_bh < 0.05]
    fdr_vals = [nt.p_fdr_bh for nt in nts_goids]
    statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}")
    statsobj.prt_data("GOATOOLS", fdr_vals)
Example #6
def describe_go2obj(go2obj, prt):
    """Describe distribution of parent and child GO term counts."""
    # Related GO | # GO  | range    | 25th | median | 75th | mean | stddev
    # -----------|-------|----------|------|--------|------|------|-------
    # Parents    | 44961 | 0 to   8 |    1 |      1 |    2 |    2 |      1
    # Children   | 17597 | 1 to 480 |    1 |      2 |    4 |    4 |     10
    cnts_all = [(len(o.children), len(o.parents)) for go, o in go2obj.items() if go == o.id]
    cnts_c, cnts_p = zip(*cnts_all)
    cnts_c = [n for n in cnts_c if n != 0] # Remove leaf-level counts from reported stats
    cnts_p = [n for n in cnts_p if n != 0] # Remove top-level counts from reported stats
    obj = StatsDescribe('GO', "{:6,}")
    obj.prt_hdr(prt, "Related GO")
    obj.prt_data("Parents", cnts_p, prt)
    obj.prt_data("Children", cnts_c, prt)
def test_statsdescribe():
    """Use StatsDescribe to create a markdown table.

name     | # fdr_bh | range of fdr_bh      | 25th perc|   median | 75th perc|     mean | stddev
GOATOOLS |       59 | 1.87e-07 to 4.94e-02 | 2.72e-04 | 1.03e-02 | 3.04e-02 | 1.56e-02 | 1.82e-02

    #pylint: disable=no-member
    nts_goids = [nt for nt in nts if nt.p_fdr_bh < 0.05]
    fdr_vals = [nt.p_fdr_bh for nt in nts_goids]
    statsobj = StatsDescribe("fdr_bh", fmtstr="{:>8.2e}")
    statsobj.prt_data("GOATOOLS", fdr_vals)
Example #10
def main(prt=sys.stdout):
    """Statistics for the protein-coding mouse gene association."""
    godag = get_godag()
    params = {
        'association_file': os.path.join(REPO, 'gene_association.mgi'),
        'genes_population': ensm2nt.keys()
    }  # Population genes
    objassc = DataAssc(params, godag)
    # Statistics for number of genes per GO in the mouse association for protein-coding genes
    go2numgenes = {go: len(genes) for go, genes in objassc.go2genes.items()}
    objdesc = StatsDescribe("GOs", "{:>5.0f}")
    objdesc.prt_hdr(prt, name="\nname      ")
    objdesc.prt_data("# genes/GO", go2numgenes.values(), prt)
    # Statistics for number of GOs per gene in the mouse association for protein-coding genes
    gene2numgos = {
        gene: len(gos)
        for gene, gos in objassc.objassc_all.assc_geneid2gos.items()
    objdesc = StatsDescribe("genes", "{:>5.0f}")
    objdesc.prt_hdr(prt, name="\nname      ")
    objdesc.prt_data("# GOs/gene", gene2numgos.values(), prt)
    # Percentage of Ensembl mouse genes covered by GO annotations
    num_pc = len(params['genes_population'])
    num_assc = len(objassc.objassc_all.assc_geneid2gos)
        "{PERC:2.0f}% of {A} of {P} Mouse protein-coding genes are annotated by GO IDs.\n"
        .format(PERC=100.0 * num_assc / num_pc, P=num_pc, A=num_assc))