예제 #1
0
def motif_enrich(mgdict, genes, adj_p=True):
    # Number of genes with motifs mapped
    M = len(set(itertools.chain.from_iterable(mgdict.values())))

    enrich = []
    run_count = 0
    # For each motif...
    for mid in mgdict.iterkeys():
        m = set(mgdict[mid])

        a = len(m.intersection(genes))
        b = len(m - genes)
        c = len(genes - m)
        d = M - len(m.union(genes))

        if a < 2: continue
        run_count += 1

        fentry = np.array([[a, b], [c, d]])
        try:
            odds, p = fisher_exact(fentry)
        except ValueError:
            print '<h3>ValueError</h3>'
            print '<p>An error in the motif enrichment function has occured</p>'
            print '<p>Please <a href="mailto:[email protected]">notify the administrator</a>.</p>'
            print M, len(m), len(genes), a, d
            sys.exit(1)

        #TODO: Only overrepresentation at the moment
        if odds > 1:
            enrich.append([mid, p, odds, fentry])

    # Adjust p-values
    if adj_p:
        padj = padjust.fdr([x[1] for x in enrich])

        for i, q in zip(xrange(len(enrich)), padj):
            enrich[i][1] = q

    enrich = sorted(enrich, key=lambda x: x[1])

    return enrich
예제 #2
0
def calc_enrichment(genes, go, set_p, set_m, set_c, underrep=False, adj_p=True):
    goenrich = []
    np.seterr(all='ignore')
    run_count = 0
    for go, v in go.iteritems():
        GOS = set()

        if v[1].lower() == 'biological process': GOS = set_p
        elif v[1].lower() == 'molecular function': GOS = set_m
        elif v[1].lower() == 'cellular component': GOS = set_c

        # The set of genes annotated to the specific GO term
        X = v[0]
        # The GO-annotated part of the gene set for which we want enrichment
        Sgo = genes & GOS

        a = len(Sgo & X) # # of our genes in this term
        b = len(Sgo - X) # # of our genes NOT in this term
        c = len(X - Sgo) # # of genes in the annotated set NOT part of our genes
        # of genes in GO category not in our genes or in the specific category
        d = len(GOS - X - Sgo)

        if a < 2: continue
        run_count += 1

        fentry = np.array([[a, c], [b, d]], dtype='float64')

        odds, p = stats.fisher_exact(fentry)

        if underrep and odds < 1:
            goenrich.append([go, p, odds, fentry, v[1]])
        elif not underrep and odds > 1:
            goenrich.append([go, p, odds, fentry, v[1]])
    
    if adj_p:
        padj = padjust.fdr([x[1] for x in goenrich])

        for i, q in zip(xrange(len(goenrich)), padj):
            goenrich[i][1] = q

    return sorted(goenrich, key=lambda x: x[1])