def motif_enrich(mgdict, genes, adj_p=True): # Number of genes with motifs mapped M = len(set(itertools.chain.from_iterable(mgdict.values()))) enrich = [] run_count = 0 # For each motif... for mid in mgdict.iterkeys(): m = set(mgdict[mid]) a = len(m.intersection(genes)) b = len(m - genes) c = len(genes - m) d = M - len(m.union(genes)) if a < 2: continue run_count += 1 fentry = np.array([[a, b], [c, d]]) try: odds, p = fisher_exact(fentry) except ValueError: print '<h3>ValueError</h3>' print '<p>An error in the motif enrichment function has occured</p>' print '<p>Please <a href="mailto:[email protected]">notify the administrator</a>.</p>' print M, len(m), len(genes), a, d sys.exit(1) #TODO: Only overrepresentation at the moment if odds > 1: enrich.append([mid, p, odds, fentry]) # Adjust p-values if adj_p: padj = padjust.fdr([x[1] for x in enrich]) for i, q in zip(xrange(len(enrich)), padj): enrich[i][1] = q enrich = sorted(enrich, key=lambda x: x[1]) return enrich
def calc_enrichment(genes, go, set_p, set_m, set_c, underrep=False, adj_p=True): goenrich = [] np.seterr(all='ignore') run_count = 0 for go, v in go.iteritems(): GOS = set() if v[1].lower() == 'biological process': GOS = set_p elif v[1].lower() == 'molecular function': GOS = set_m elif v[1].lower() == 'cellular component': GOS = set_c # The set of genes annotated to the specific GO term X = v[0] # The GO-annotated part of the gene set for which we want enrichment Sgo = genes & GOS a = len(Sgo & X) # # of our genes in this term b = len(Sgo - X) # # of our genes NOT in this term c = len(X - Sgo) # # of genes in the annotated set NOT part of our genes # of genes in GO category not in our genes or in the specific category d = len(GOS - X - Sgo) if a < 2: continue run_count += 1 fentry = np.array([[a, c], [b, d]], dtype='float64') odds, p = stats.fisher_exact(fentry) if underrep and odds < 1: goenrich.append([go, p, odds, fentry, v[1]]) elif not underrep and odds > 1: goenrich.append([go, p, odds, fentry, v[1]]) if adj_p: padj = padjust.fdr([x[1] for x in goenrich]) for i, q in zip(xrange(len(goenrich)), padj): goenrich[i][1] = q return sorted(goenrich, key=lambda x: x[1])