def calc_enrichment(gene_list, GO_ID_list, total_unique_gene, GO_Term_list): M = total_unique_gene enriched_list = [] for term in GO_ID_list: if len(GO_ID_list.get(term)) >= 20 and len( GO_ID_list.get(term)) <= 2000: pvalue, overlap = calc_pvalue(gene_list, GO_ID_list.get(term), M) if len(overlap) > 1: enriched_item = { "go_id": term, "name": GO_Term_list.get(term)[0], "description": GO_Term_list.get(term)[1], "pvalue": pvalue, "overlap": overlap, "genes_from_list": len(gene_list), "genes_from_go": len(GO_ID_list.get(term)) } enriched_list.append(enriched_item) enriched_list.sort(key=lambda it: it['pvalue']) for qvalue, it in itertools.izip( fdr([it['pvalue'] for it in enriched_list], presorted=True), enriched_list): if math.fabs(qvalue) == 0: it['qvalue'] = 1000 else: it['qvalue'] = -math.log(qvalue, 10) enriched_list.sort(key=lambda it: it['qvalue'], reverse=True) return enriched_list
def gene_set_enrichment(gene_list, M=None): ''' :param gene_list: list of gene symbols :param M: total number of genes (derived from database, if None) :return: filtered list of GO terms with p-value, q-value, and size of overlap ''' client = pymongo.MongoClient() if not M: M = len(client.go.genes.distinct('gene')) terms = list(client.go.genes.find({'gene': {'$in': list(gene_list)}}).distinct('go')) terms = list(client.go.terms.find({'go': {'$in': terms}, 'n_genes': {'$gt': 2}})) enriched = [dict(term.items() + zip(('pvalue', 'overlap'), calc_pvalue(gene_list, term['genes'], M))) for term in terms] enriched.sort(key=lambda it: it['pvalue']) for qvalue, it in itertools.izip(fdr([it['pvalue'] for it in enriched], presorted=True), enriched): it['qvalue'] = qvalue return enriched
def calc_enrichment(gene_list, GO_ID_list, total_unique_gene, GO_Term_list): M = total_unique_gene enriched_list = [] for term in GO_ID_list: if len(GO_ID_list.get(term)) >= 20 and len(GO_ID_list.get(term)) <= 2000: pvalue, overlap = calc_pvalue(gene_list, GO_ID_list.get(term), M) if len(overlap) > 1: enriched_item = {"go_id": term, "name":GO_Term_list.get(term)[0] ,"description":GO_Term_list.get(term)[1], "pvalue": pvalue, "overlap": overlap, "genes_from_list": len(gene_list), "genes_from_go": len(GO_ID_list.get(term))} enriched_list.append(enriched_item) enriched_list.sort(key=lambda it: it['pvalue']) for qvalue, it in itertools.izip(fdr([it['pvalue'] for it in enriched_list], presorted=True), enriched_list): if math.fabs(qvalue) == 0: it['qvalue'] = float("inf") else: it['qvalue'] = -math.log(qvalue, 10) enriched_list.sort(key=lambda it: it['qvalue'], reverse=True) return enriched_list
def gene_set_enrichment(gene_list, M=None): ''' :param gene_list: list of gene symbols --> ENSG00000233636, ENSG00000129673, etc... :param M: total number of genes (derived from database, if None) :return: filtered list of set terms with p-value, q-value, and size of overlap ''' client = pymongo.MongoClient() if not M: M = len(client.go.genes.distinct('gene')) terms = list( client.go.genes.find({ 'gene': { '$in': list(gene_list) } }).distinct('go')) #GO:0004059, GO:0006474, etc... terms = list( client.go.terms.find({ 'go': { '$in': terms }, 'n_genes': { '$gt': 2 } })) enriched = [ dict(term.items() + zip(('pvalue', 'overlap'), calc_pvalue(gene_list, term['genes'], M))) for term in terms ] enriched.sort(key=lambda it: it['pvalue']) for qvalue, it in itertools.izip( fdr([it['pvalue'] for it in enriched], presorted=True), enriched): it['qvalue'] = qvalue return enriched
strDirection = 'PWS < PFS' #print('ROI1 = %s, ROI2 = %s: p = %f (%s)'%(b_rois[i1], b_rois[i2], p, strDirection)) #print('ROI1 = %s, ROI2 = %s: p = %f (%s)'%(roi1, roi2, p, strDirection)) t_showZScores['PFS'].append(z_PFS) t_showZScores['PWS'].append(z_PWS) t_sigROIs['roi1'].append(roi1) t_sigROIs['roi2'].append(roi2) k = k + 1 print('Between-group comparisons of correlation z-scores') if bFDR: signifThresh = fdr(all_ps, FDR) print('**FDR** (q = %f) --> signifThresh = %f\n'%(FDR, signifThresh)) else: print('Pre-specified signifThresh = %f\n'%signifThresh); ### Determine the significant differences ### keep_idx = [] for i1 in range(nTotComps): if all_ps[i1] < signifThresh: keep_idx.append(i1) nSigDiff = len(keep_idx) for idx in keep_idx: p = all_ps[idx] zScores_PFS = t_showZScores['PFS'][idx] zScores_PWS = t_showZScores['PWS'][idx]