Beispiel #1
0
def estimate_frequency(motif,k,samples=100000,thresh=0.7):
    #Build sequences
    estimate   = -30
    total      = 0
    totalcount = 0
    for i in range(40):
        long_string = 'ACGT'*(int(float(samples)*k/4))
        long_string = list(long_string)
        random.shuffle(long_string)
        random.shuffle(long_string)
        random.shuffle(long_string)
        long_string = ''.join(long_string)
        seqD = {}
        for i in range(samples):
            offset = k*i
            seqD[i] = long_string[offset:offset+k]
        P = ProbeSet(genome=seqD)
        count = P.count_matching_probes(motif,thresh=thresh)
        total      += float(samples)
        totalcount += float(count)
        f = totalcount/total
        d = math.fabs(f-estimate)/(estimate+0.00000001)
        estimate = f
        if d < 1e-4: break
        if i > 2 and totalcount > 100: break
        #print '%10d %10d %12.3e  %12.3e'%(totalcount, total, f, d)
    return estimate
Beispiel #2
0
def estimate_frequency(motif, k, samples=100000, thresh=0.7):
    #Build sequences
    estimate = -30
    total = 0
    totalcount = 0
    for i in range(40):
        long_string = 'ACGT' * (int(float(samples) * k / 4))
        long_string = list(long_string)
        random.shuffle(long_string)
        random.shuffle(long_string)
        random.shuffle(long_string)
        long_string = ''.join(long_string)
        seqD = {}
        for i in range(samples):
            offset = k * i
            seqD[i] = long_string[offset:offset + k]
        P = ProbeSet(genome=seqD)
        count = P.count_matching_probes(motif, thresh=thresh)
        total += float(samples)
        totalcount += float(count)
        f = totalcount / total
        d = math.fabs(f - estimate) / (estimate + 0.00000001)
        estimate = f
        if d < 1e-4: break
        if i > 2 and totalcount > 100: break
        #print '%10d %10d %12.3e  %12.3e'%(totalcount, total, f, d)
    return estimate
Beispiel #3
0
def probOvlpBinomial(A,B,thresh=0.7,verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
    else:
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    newWide  = Wide[-1,Wide.width+1]
    if Wide.__dict__.has_key('bestWide'):
        bestWide = Wide.bestWide
    else:
        bestWideD = {}
        for x in newWide.bestseqs(thresh*newWide.maxscore):
            bestWideD[x[1]] = 1
        for x in bestWideD.keys():
            bestWideD[RC(x)] = 1
        Wide.bestWide = bestWideD.keys()
        bestWide = Wide.bestWide
    Wide = newWide

    D={}
    for i in range(len(bestWide)):
        D[i] = bestWide[i]
    P = ProbeSet(genome=D)
    matchNarrow = P.count_matching_probes(Narrow,thresh=thresh)
    
    if matchNarrow == 0:
        p = 1.0
        return p
    
    if not Narrow.__dict__.has_key('probNarrow'): Narrow.probNarrow = {}
    if Narrow.probNarrow.has_key(Wide.width):
        probNarrow = Narrow.probNarrow[Wide.width]
    else:
        probNarrow = estimate_frequency(Narrow,Wide.width,thresh=thresh)
        Narrow.probNarrow[Wide.width] = probNarrow

    p = Arith.binomialsumtail(probNarrow,len(bestWide),matchNarrow)
    print '\nD= %7.3f %9.4e %8d %7d %-14s %-20s %-14s %-20s'%(
        Arith.nlog10(p),probNarrow,len(bestWide),matchNarrow,
        A.family,A,B.family,B)


    return p
Beispiel #4
0
def probOvlpBinomial(A, B, thresh=0.7, verbose=None):
    if A.width >= B.width:
        Wide, Narrow = A, B
    else:
        Wide, Narrow = B, A

    RC = MotifTools.revcomplement
    newWide = Wide[-1, Wide.width + 1]
    if Wide.__dict__.has_key('bestWide'):
        bestWide = Wide.bestWide
    else:
        bestWideD = {}
        for x in newWide.bestseqs(thresh * newWide.maxscore):
            bestWideD[x[1]] = 1
        for x in bestWideD.keys():
            bestWideD[RC(x)] = 1
        Wide.bestWide = bestWideD.keys()
        bestWide = Wide.bestWide
    Wide = newWide

    D = {}
    for i in range(len(bestWide)):
        D[i] = bestWide[i]
    P = ProbeSet(genome=D)
    matchNarrow = P.count_matching_probes(Narrow, thresh=thresh)

    if matchNarrow == 0:
        p = 1.0
        return p

    if not Narrow.__dict__.has_key('probNarrow'): Narrow.probNarrow = {}
    if Narrow.probNarrow.has_key(Wide.width):
        probNarrow = Narrow.probNarrow[Wide.width]
    else:
        probNarrow = estimate_frequency(Narrow, Wide.width, thresh=thresh)
        Narrow.probNarrow[Wide.width] = probNarrow

    p = Arith.binomialsumtail(probNarrow, len(bestWide), matchNarrow)
    print '\nD= %7.3f %9.4e %8d %7d %-14s %-20s %-14s %-20s' % (Arith.nlog10(
        p), probNarrow, len(bestWide), matchNarrow, A.family, A, B.family, B)

    return p