예제 #1
0
def calc_enrichment(gene_list, GO_ID_list, total_unique_gene, GO_Term_list):
    M = total_unique_gene

    enriched_list = []
    for term in GO_ID_list:
        if len(GO_ID_list.get(term)) >= 20 and len(
                GO_ID_list.get(term)) <= 2000:
            pvalue, overlap = calc_pvalue(gene_list, GO_ID_list.get(term), M)
            if len(overlap) > 1:
                enriched_item = {
                    "go_id": term,
                    "name": GO_Term_list.get(term)[0],
                    "description": GO_Term_list.get(term)[1],
                    "pvalue": pvalue,
                    "overlap": overlap,
                    "genes_from_list": len(gene_list),
                    "genes_from_go": len(GO_ID_list.get(term))
                }
                enriched_list.append(enriched_item)

    enriched_list.sort(key=lambda it: it['pvalue'])

    for qvalue, it in itertools.izip(
            fdr([it['pvalue'] for it in enriched_list], presorted=True),
            enriched_list):
        if math.fabs(qvalue) == 0:
            it['qvalue'] = 1000
        else:
            it['qvalue'] = -math.log(qvalue, 10)

    enriched_list.sort(key=lambda it: it['qvalue'], reverse=True)

    return enriched_list
예제 #2
0
파일: go.py 프로젝트: ndexbio/ndex-nav
def gene_set_enrichment(gene_list, M=None):
    '''
    :param gene_list: list of gene symbols
    :param M: total number of genes (derived from database, if None)
    :return: filtered list of GO terms with p-value, q-value, and size of overlap
    '''
    client = pymongo.MongoClient()
    if not M:
        M = len(client.go.genes.distinct('gene'))
    terms = list(client.go.genes.find({'gene': {'$in': list(gene_list)}}).distinct('go'))
    terms = list(client.go.terms.find({'go': {'$in': terms}, 'n_genes': {'$gt': 2}}))
    enriched = [dict(term.items() + zip(('pvalue', 'overlap'), calc_pvalue(gene_list, term['genes'], M))) for term in terms]
    enriched.sort(key=lambda it: it['pvalue'])
    for qvalue, it in itertools.izip(fdr([it['pvalue'] for it in enriched], presorted=True), enriched):
        it['qvalue'] = qvalue

    return enriched
def calc_enrichment(gene_list, GO_ID_list, total_unique_gene, GO_Term_list):
    M = total_unique_gene

    enriched_list = []
    for term in GO_ID_list:
        if len(GO_ID_list.get(term)) >= 20 and len(GO_ID_list.get(term)) <= 2000:
            pvalue, overlap = calc_pvalue(gene_list, GO_ID_list.get(term), M)
            if len(overlap) > 1:
                enriched_item = {"go_id": term, "name":GO_Term_list.get(term)[0] ,"description":GO_Term_list.get(term)[1],
                                 "pvalue": pvalue, "overlap": overlap, "genes_from_list": len(gene_list), "genes_from_go": len(GO_ID_list.get(term))}
                enriched_list.append(enriched_item)

    enriched_list.sort(key=lambda it: it['pvalue'])

    for qvalue, it in itertools.izip(fdr([it['pvalue'] for it in enriched_list], presorted=True), enriched_list):
        if math.fabs(qvalue) == 0:
            it['qvalue'] = float("inf")
        else:
            it['qvalue'] = -math.log(qvalue, 10)

    enriched_list.sort(key=lambda it: it['qvalue'], reverse=True)

    return enriched_list
예제 #4
0
def gene_set_enrichment(gene_list, M=None):
    '''
    :param gene_list: list of gene symbols --> ENSG00000233636, ENSG00000129673, etc...
    :param M: total number of genes (derived from database, if None)
    :return: filtered list of set terms with p-value, q-value, and size of overlap
    '''
    client = pymongo.MongoClient()
    if not M:
        M = len(client.go.genes.distinct('gene'))
    terms = list(
        client.go.genes.find({
            'gene': {
                '$in': list(gene_list)
            }
        }).distinct('go'))  #GO:0004059, GO:0006474, etc...
    terms = list(
        client.go.terms.find({
            'go': {
                '$in': terms
            },
            'n_genes': {
                '$gt': 2
            }
        }))
    enriched = [
        dict(term.items() +
             zip(('pvalue',
                  'overlap'), calc_pvalue(gene_list, term['genes'], M)))
        for term in terms
    ]
    enriched.sort(key=lambda it: it['pvalue'])
    for qvalue, it in itertools.izip(
            fdr([it['pvalue'] for it in enriched], presorted=True), enriched):
        it['qvalue'] = qvalue

    return enriched
                strDirection = 'PWS < PFS'
            #print('ROI1 = %s, ROI2 = %s: p = %f (%s)'%(b_rois[i1], b_rois[i2], p, strDirection))
            #print('ROI1 = %s, ROI2 = %s: p = %f (%s)'%(roi1, roi2, p, strDirection))
            t_showZScores['PFS'].append(z_PFS)
            t_showZScores['PWS'].append(z_PWS)
            t_sigROIs['roi1'].append(roi1)
            t_sigROIs['roi2'].append(roi2)

            k = k + 1

    
    

    print('Between-group comparisons of correlation z-scores')
    if bFDR:
        signifThresh = fdr(all_ps, FDR)
        print('**FDR** (q = %f) --> signifThresh = %f\n'%(FDR, signifThresh))
    else:
        print('Pre-specified signifThresh = %f\n'%signifThresh);

    ### Determine the significant differences ###
    keep_idx = []
    for i1 in range(nTotComps):
        if all_ps[i1] < signifThresh:
            keep_idx.append(i1)
    nSigDiff = len(keep_idx)

    for idx in keep_idx:
        p = all_ps[idx]
        zScores_PFS = t_showZScores['PFS'][idx]
        zScores_PWS = t_showZScores['PWS'][idx]