Ejemplo n.º 1
0
def orforflistmatch(s_orf,orflist):
    """
    Which annotations associated with s_orf are significantly overrepresented in orflist?
    """
    if not _orfs_by_go: load_GO()
    norfs   = len(orflist)
    totorfs = len(_gos_by_orf.keys())

    categories = []
    for anno in annotations(s_orf):
        categories.append(anno.desc)

    sigs = []
    for category in categories:
        all_by_cat = annotation2orfs(category)
        nall       = len(all_by_cat)
        fracall    = float(nall)/float(totorfs)
        
        sub_by_cat = [x for x in orflist if x in all_by_cat]
        nsub       = len(sub_by_cat)

        if norfs == 0: fracsub = 0
        else:          fracsub    = float(nsub)/float(norfs)

        sig        = Arith.hypgeomsummore(nall,totorfs,norfs,nsub)
        sigs.append( (sig, category, fracall, fracsub, sub_by_cat) )
        #if s_orf == SGD.gene2orf('NRG1'):
        #    print '@ NRG1 %5.2e %-40s %4d %4d %4d %4d '%(
        #        sig,category,nall,totorfs,norfs,nsub)
    sigs.sort()

    #for sig in sigs: print sig
    return sigs
Ejemplo n.º 2
0
def orflist2categories_long(orflist,thresh=0.05):
    """
    Which categories are overrepresented among orflist.  Thresh is applied after Bonferroni
    correction.

    Returns: Sorted list of tuples, [(signifance, category, fracall, fracsub, orflist), ...]
    """
    if not _orfs_by_go: load_GO()
    norfs   = len(orflist)
    totorfs = len(_gos_by_orf.keys())

    #Determine which categories are described by the set
    categories = []
    for orf in orflist:
        for anno in annotations(orf):
            if anno.desc not in categories:
                categories.append(anno.desc)

    totcats = float(len(categories))
    sigs = []
    for category in categories:
        all_by_cat = annotation2orfs(category)
        nall       = len(all_by_cat)
        fracall    = float(nall)/float(totorfs)
        
        sub_by_cat = [x for x in orflist if x in all_by_cat]
        nsub       = len(sub_by_cat)
        fracsub    = float(nsub)/float(norfs)

        sig        = Arith.hypgeomsummore(nall,totorfs,norfs,nsub) * totcats
        #print category,sig,nall,totorfs,norfs,nsub,'       ',totcats
        sigs.append( (sig, category, fracall, fracsub, sub_by_cat) )
        
    sigs.sort()

    ans = []
    for sigdata in sigs:
        sig, category, fracall, fracsub, orfs = sigdata
        if sig > thresh: continue
        ans.append(sigdata)

    return ans
Ejemplo n.º 3
0
def probOvlp(A,B,thresh=0.7,verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
    else:
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    if 1:
        newWide  = Wide[-1,Wide.width+1]
        if Wide.__dict__.has_key('bestWide'):
            bestWide = Wide.bestWide
        else:
            bestWideD = {}
            for x in newWide.bestseqs(thresh*newWide.maxscore):
                bestWideD[x] = 1
            for x in bestWideD.keys():
                bestWideD[RC(x)] = 1
            Wide.bestWide = bestWideD.keys()
            bestWide = Wide.bestWide
        Wide = newWide
    
        if Narrow.__dict__.has_key('bestNarrow'):
            bestNarrow = Narrow.bestNarrow
        else:
            bestNarrowD = {}
            for x in Narrow.bestseqs(thresh*Narrow.maxscore):
                bestNarrowD[x] = 1
            for x in bestNarrowD.keys():
                bestNarrowD[RC(x)] = 1
            bestNarrow = bestNarrowD.keys()
            Narrow.bestNarrow = bestNarrow
        
    #bestWide   = [x[1] for x in Wide.bestseqs  (thresh*Wide.maxscore)  ]
    #bestNarrow = [x[1] for x in Narrow.bestseqs(thresh*Narrow.maxscore)]

    countNarrow = len(bestNarrow)
    countWide   = len(bestWide)

    numtotal    = math.pow(4,Wide.width)
    fudgefactor = math.pow(4,Wide.width - Narrow.width)

    bestWideTups = [(x,MotifTools.revcomplement(x)) for x in bestWide]

    countBoth = 0
    for i in range(len(bestNarrow)):
        m_narrow = bestNarrow[i]
        delj = []

        for j in range(len(bestWideTups)):
            if (bestWideTups[j][0].find(m_narrow) >= 0) or (bestWideTups[j][1].find(m_narrow) >= 0):
                countBoth += 1
                delj.append(j)

        delj.reverse()  #Chew in from the back
        for j in delj:
            del(bestWideTups[j])


    if verbose: print '%10d %10d %10d %10d | %10d  %5d '%(
        countWide, numtotal, countNarrow *fudgefactor , countBoth , countNarrow, Wide.width - Narrow.width),
    
    p = Arith.hypgeomsummore(countWide,                 #Num Interesting
                             numtotal,                  #All k-mers
                             countNarrow * fudgefactor, #Number picked
                             countBoth                ) #Number found
    return p
Ejemplo n.º 4
0
def probOvlp(A, B, thresh=0.7, verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
    else:
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    if 1:
        newWide = Wide[-1, Wide.width + 1]
        if Wide.__dict__.has_key('bestWide'):
            bestWide = Wide.bestWide
        else:
            bestWideD = {}
            for x in newWide.bestseqs(thresh * newWide.maxscore):
                bestWideD[x] = 1
            for x in bestWideD.keys():
                bestWideD[RC(x)] = 1
            Wide.bestWide = bestWideD.keys()
            bestWide = Wide.bestWide
        Wide = newWide

        if Narrow.__dict__.has_key('bestNarrow'):
            bestNarrow = Narrow.bestNarrow
        else:
            bestNarrowD = {}
            for x in Narrow.bestseqs(thresh * Narrow.maxscore):
                bestNarrowD[x] = 1
            for x in bestNarrowD.keys():
                bestNarrowD[RC(x)] = 1
            bestNarrow = bestNarrowD.keys()
            Narrow.bestNarrow = bestNarrow

    #bestWide   = [x[1] for x in Wide.bestseqs  (thresh*Wide.maxscore)  ]
    #bestNarrow = [x[1] for x in Narrow.bestseqs(thresh*Narrow.maxscore)]

    countNarrow = len(bestNarrow)
    countWide = len(bestWide)

    numtotal = math.pow(4, Wide.width)
    fudgefactor = math.pow(4, Wide.width - Narrow.width)

    bestWideTups = [(x, MotifTools.revcomplement(x)) for x in bestWide]

    countBoth = 0
    for i in range(len(bestNarrow)):
        m_narrow = bestNarrow[i]
        delj = []

        for j in range(len(bestWideTups)):
            if (bestWideTups[j][0].find(m_narrow) >=
                    0) or (bestWideTups[j][1].find(m_narrow) >= 0):
                countBoth += 1
                delj.append(j)

        delj.reverse()  #Chew in from the back
        for j in delj:
            del (bestWideTups[j])

    if verbose:
        print '%10d %10d %10d %10d | %10d  %5d ' % (
            countWide, numtotal, countNarrow * fudgefactor, countBoth,
            countNarrow, Wide.width - Narrow.width),

    p = Arith.hypgeomsummore(
        countWide,  #Num Interesting
        numtotal,  #All k-mers
        countNarrow * fudgefactor,  #Number picked
        countBoth)  #Number found
    return p