def pairHmmRun2(QuerySeq,trSeq,tProb,eProb_M,eProb_X,qName):
    ###
    InitLookupTable()

    #print "QuerySeq ", QuerySeq
    #print "trSeq ", trSeq
    #print "tProb:\n", tProb
    pen = 1.0
    eProb_Y = np.array([pen, pen, pen, pen])

    nq = len(QuerySeq) #QuerySeq represents the putative TR region in the Query from 3DP step
    ntr = len(trSeq) #trSeq is the repeat Element

    #coarse estimation of TRs in QuerySeq
    #estTRinQ = int(nq/ntr)
    estTRinQ = (nq/float(ntr))
    #A factor reflecting unrestricted TR estimation
    mltFactor = 1.5

    #Converting QuerySeq to integers
    q = np.arange(nq,dtype=np.int)
    tempQ = map(ord,QuerySeq)
    q = np.asarray(tempQ)

    #Defining the repeatSeq for pairHmm
    repeatSeq = trSeq*(int(mltFactor*estTRinQ)+1)
    #print "estTRinQ: ",estTRinQ*mltFactor
    #print "repeatSeq: ",repeatSeq 
    #Constructing the repeat array and converting to integers
    nr = len(repeatSeq)
    r = np.arange(nr,dtype=np.int)
    tempR = map(ord,repeatSeq)
    r = np.asarray(tempR)

    #print "nr: ",nr," nq: ",nq, " ntr: ", ntr   

    #np.set_printoptions(suppress=True)
    Lf_M = np.zeros((nq+1,nr+1))-1.0*float("inf")
    Lf_X= np.zeros((nq+1,nr+1))-1.0*float("inf")
    Lf_Y = np.zeros((nq+1,nr+1))-1.0*float("inf")
    Lf_M,Lf_X,Lf_Y = pHf2.forwardProbLog(q,nq,r,nr,Lf_M,Lf_X,Lf_Y,eProb_M,eProb_X,eProb_Y,tProb)
    print "##done_FWD_Calc##\n"

    edf = np.zeros((nr+1))
    num_TR,llr,index,ProbMax,edf,LendAt = pHf2.numTRlog(Lf_M,Lf_X,Lf_Y,ntr,nr,nq)

    LLRcutOff = 0.01
    LallQR,index = pHf2.getAllFwdProb(Lf_M,Lf_X,Lf_Y,ntr,nr,nq)
    print "##AllFwdProb##\n"

    #lowerBound, devList, diffLLR,LLR = getLowerBound3(LallQR,index,nq,ntr,LLRcutOff,qName)
    diffLLR,LLR = getLowerBound_OldMethod(LallQR,index,nq,ntr,LLRcutOff,qName)
    print "##LowerBound##\n"

    #return num_TR, diffLLR, LLR,lowerBound,devList #UNCOMMENT THIS FOR THE NEW METHOD
    return num_TR, diffLLR, LLR #THIS IS FOR THE OLD METHOD
Ejemplo n.º 2
0
def pairHmmRun2(QuerySeq, trSeq, tProb, eProb_M, eProb_X, qName):
    ###
    InitLookupTable()

    #print "QuerySeq ", QuerySeq
    #print "trSeq ", trSeq
    #print "tProb:\n", tProb
    pen = 1.0
    eProb_Y = np.array([pen, pen, pen, pen])

    nq = len(
        QuerySeq
    )  #QuerySeq represents the putative TR region in the Query from 3DP step
    ntr = len(trSeq)  #trSeq is the repeat Element

    #coarse estimation of TRs in QuerySeq
    #estTRinQ = int(nq/ntr)
    estTRinQ = (nq / float(ntr))
    #A factor reflecting unrestricted TR estimation
    mltFactor = 1.5

    #Converting QuerySeq to integers
    q = np.arange(nq, dtype=np.int)
    tempQ = map(ord, QuerySeq)
    q = np.asarray(tempQ)

    #Defining the repeatSeq for pairHmm
    repeatSeq = trSeq * (int(mltFactor * estTRinQ) + 1)
    #print "estTRinQ: ",estTRinQ*mltFactor
    #print "repeatSeq: ",repeatSeq
    #Constructing the repeat array and converting to integers
    nr = len(repeatSeq)
    r = np.arange(nr, dtype=np.int)
    tempR = map(ord, repeatSeq)
    r = np.asarray(tempR)

    #print "nr: ",nr," nq: ",nq, " ntr: ", ntr

    #np.set_printoptions(suppress=True)
    Lf_M = np.zeros((nq + 1, nr + 1)) - 1.0 * float("inf")
    Lf_X = np.zeros((nq + 1, nr + 1)) - 1.0 * float("inf")
    Lf_Y = np.zeros((nq + 1, nr + 1)) - 1.0 * float("inf")
    Lf_M, Lf_X, Lf_Y = pHf2.forwardProbLog(q, nq, r, nr, Lf_M, Lf_X, Lf_Y,
                                           eProb_M, eProb_X, eProb_Y, tProb)
    print "##done_FWD_Calc##\n"

    edf = np.zeros((nr + 1))
    num_TR, llr, index, ProbMax, edf, LendAt = pHf2.numTRlog(
        Lf_M, Lf_X, Lf_Y, ntr, nr, nq)

    LLRcutOff = 0.01
    LallQR, index = pHf2.getAllFwdProb(Lf_M, Lf_X, Lf_Y, ntr, nr, nq)
    print "##AllFwdProb##\n"

    #lowerBound, devList, diffLLR,LLR = getLowerBound3(LallQR,index,nq,ntr,LLRcutOff,qName)
    diffLLR, LLR = getLowerBound_OldMethod(LallQR, index, nq, ntr, LLRcutOff,
                                           qName)
    print "##LowerBound##\n"

    #return num_TR, diffLLR, LLR,lowerBound,devList #UNCOMMENT THIS FOR THE NEW METHOD
    return num_TR, diffLLR, LLR  #THIS IS FOR THE OLD METHOD
Ejemplo n.º 3
0
def pairHmmRun(QuerySeq, trSeq, tProb, eProb_M, eProb_X):
    ###
    InitLookupTable()

    #print "QuerySeq ", QuerySeq
    #print "trSeq ", trSeq
    #print "tProb:\n", tProb
    pen = 1.0
    eProb_Y = np.array([pen, pen, pen, pen])

    nq = len(
        QuerySeq
    )  #QuerySeq represents the putative TR region in the Query from 3DP step
    ntr = len(trSeq)  #trSeq is the repeat Element

    #coarse estimation of TRs in QuerySeq
    #estTRinQ = int(nq/ntr)
    estTRinQ = (nq / float(ntr))
    #A factor reflecting unrestricted TR estimation
    mltFactor = 1.5

    #Converting QuerySeq to integers
    q = np.arange(nq, dtype=np.int)
    tempQ = map(ord, QuerySeq)
    q = np.asarray(tempQ)

    #Defining the repeatSeq for pairHmm
    repeatSeq = trSeq * (int(mltFactor * estTRinQ) + 1)
    #print "estTRinQ: ",estTRinQ*mltFactor
    #print "repeatSeq: ",repeatSeq
    #Constructing the repeat array and converting to integers
    nr = len(repeatSeq)
    r = np.arange(nr, dtype=np.int)
    tempR = map(ord, repeatSeq)
    r = np.asarray(tempR)

    #print "nr: ",nr," nq: ",nq, " ntr: ", ntr

    #np.set_printoptions(suppress=True)
    Lf_M = np.zeros((nq + 1, nr + 1)) - 1.0 * float("inf")
    Lf_X = np.zeros((nq + 1, nr + 1)) - 1.0 * float("inf")
    Lf_Y = np.zeros((nq + 1, nr + 1)) - 1.0 * float("inf")
    Lf_M, Lf_X, Lf_Y = pHf2.forwardProbLog(q, nq, r, nr, Lf_M, Lf_X, Lf_Y,
                                           eProb_M, eProb_X, eProb_Y, tProb)
    print "##done_FWD_Calc##\n"

    edf = np.zeros((nr + 1))
    num_TR, llr, index, ProbMax, edf, LendAt = pHf2.numTRlog(
        Lf_M, Lf_X, Lf_Y, ntr, nr, nq)
    #num_TR,llr,index,ProbMax,edf,LendAt = numTRlog(Lf_M,Lf_X,Lf_Y,ntr,nr,nq)
    #print "LendAt: ", LendAt
    #print "index: ", index
    #print "edf: ", edf
    samplesNumTR = generateSamples(edf, index, ntr, 50)
    print "##sampling##\n"

    LLRcutOff = 0.01
    LallQR, index = pHf2.getAllFwdProb(Lf_M, Lf_X, Lf_Y, ntr, nr, nq)
    #print "index where max prob exists: ", index
    print "##AllFwdProb##\n"
    #lowerBound, devList, maxLlrIdx = getLowerBound(LallQR,index,nq,ntr,LLRcutOff)
    highDivLen, highDivRegion = getLowerBound2(LallQR, index, nq, ntr,
                                               LLRcutOff)
    print "##LowerBound##\n"

    return num_TR, llr, ProbMax, samplesNumTR, highDivLen, highDivRegion
def pairHmmRun(QuerySeq,trSeq,tProb,eProb_M,eProb_X):
    ###
    InitLookupTable()
 
    #print "QuerySeq ", QuerySeq
    #print "trSeq ", trSeq
    #print "tProb:\n", tProb
    pen = 1.0
    eProb_Y = np.array([pen, pen, pen, pen])

    nq = len(QuerySeq) #QuerySeq represents the putative TR region in the Query from 3DP step
    ntr = len(trSeq) #trSeq is the repeat Element

    #coarse estimation of TRs in QuerySeq
    #estTRinQ = int(nq/ntr)
    estTRinQ = (nq/float(ntr))
    #A factor reflecting unrestricted TR estimation
    mltFactor = 1.5

    #Converting QuerySeq to integers
    q = np.arange(nq,dtype=np.int)
    tempQ = map(ord,QuerySeq)
    q = np.asarray(tempQ)

    #Defining the repeatSeq for pairHmm
    repeatSeq = trSeq*(int(mltFactor*estTRinQ)+1)
    #print "estTRinQ: ",estTRinQ*mltFactor
    #print "repeatSeq: ",repeatSeq 
    #Constructing the repeat array and converting to integers
    nr = len(repeatSeq)
    r = np.arange(nr,dtype=np.int)
    tempR = map(ord,repeatSeq)
    r = np.asarray(tempR)

    #print "nr: ",nr," nq: ",nq, " ntr: ", ntr   

    #np.set_printoptions(suppress=True)
    Lf_M = np.zeros((nq+1,nr+1))-1.0*float("inf")
    Lf_X= np.zeros((nq+1,nr+1))-1.0*float("inf")
    Lf_Y = np.zeros((nq+1,nr+1))-1.0*float("inf")
    Lf_M,Lf_X,Lf_Y = pHf2.forwardProbLog(q,nq,r,nr,Lf_M,Lf_X,Lf_Y,eProb_M,eProb_X,eProb_Y,tProb)
    print "##done_FWD_Calc##\n"

    edf = np.zeros((nr+1))
    num_TR,llr,index,ProbMax,edf,LendAt = pHf2.numTRlog(Lf_M,Lf_X,Lf_Y,ntr,nr,nq)
    #num_TR,llr,index,ProbMax,edf,LendAt = numTRlog(Lf_M,Lf_X,Lf_Y,ntr,nr,nq)
    #print "LendAt: ", LendAt
    #print "index: ", index
    #print "edf: ", edf
    samplesNumTR = generateSamples(edf,index,ntr,50) 
    print "##sampling##\n"

    LLRcutOff = 0.01
    LallQR,index = pHf2.getAllFwdProb(Lf_M,Lf_X,Lf_Y,ntr,nr,nq) 
    #print "index where max prob exists: ", index
    print "##AllFwdProb##\n"
    #lowerBound, devList, maxLlrIdx = getLowerBound(LallQR,index,nq,ntr,LLRcutOff)
    highDivLen, highDivRegion = getLowerBound2(LallQR,index,nq,ntr,LLRcutOff)
    print "##LowerBound##\n"
    
    return num_TR, llr, ProbMax, samplesNumTR, highDivLen, highDivRegion