def findVoteScore(in1List, leftSeg, parameterRobot): H0Score, H1Score = 0, 0 threshold = 10 perr = parameterRobot.p print " len(in1List) ", len(in1List) for i in range(len(in1List)): # Determine F/B read = in1List[i].longread score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( read, leftSeg[0], parameterRobot) reverseread = cleaner.reverseStrand(read) scoreRev, returnalignedSeq1Rev, returnalignedSeq2Rev, startiRev, startjRev, endiRev, endjRev = cleaner.SWAlignment( reverseread, leftSeg[0], parameterRobot) if score > scoreRev: forwardCk = True scoreLeft0 = score else: forwardCk = False scoreLeft0 = scoreRev if forwardCk: score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( read, leftSeg[1], parameterRobot) scoreLeft1 = score else: score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( reverseread, leftSeg[1], parameterRobot) scoreLeft1 = score print "scoreLeft0, scoreLeft1 ", scoreLeft0, scoreLeft1 skipCounting = False MAPScore = [0, 0] # Filering reads if startj > threshold or endj < len(leftSeg[0]) - threshold: skipCounting = True # MAP decision rule if forwardCk: for i in range(2): #tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMP(leftSeg[i],read, parameterRobot) #lengthSeed = len(leftSeg[i]) #countConfirm, countDel, countIns, countSub = cleaner.countEdits(returnalignedSeq2, returnalignedSeq1) # care for the 1,2 and its fcn #confirmNoDelete,confirmNoInsert = countConfirm , lengthSeed - countIns #MAPScore[i] = math.log(1-perr)*confirmNoDelete + math.log(1-perr)*confirmNoInsert + math.log(perr)*countDel + math.log(perr*perr/3)*countSub + math.log(perr/3)*countIns + math.log(numPath) tempScore, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj, numPath = cleaner.SWAlignmentFixRefMPQS( leftSeg[i], read, parameterRobot) MAPScore[i] = tempScore else: for i in range(2): #tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMP(leftSeg[i],reverseread, parameterRobot) #lengthSeed = len(leftSeg[i]) #countConfirm, countDel, countIns, countSub = cleaner.countEdits(returnalignedSeq2, returnalignedSeq1) # care for the 1,2 and its fcn #confirmNoDelete,confirmNoInsert = countConfirm , lengthSeed - countIns #MAPScore[i] = math.log(1-perr)*confirmNoDelete + math.log(1-perr)*confirmNoInsert + math.log(perr)*countDel + math.log(perr*perr/3)*countSub + math.log(perr/3)*countIns + math.log(numPath) tempScore, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj, numPath = cleaner.SWAlignmentFixRefMPQS( leftSeg[i], reverseread, parameterRobot) MAPScore[i] = tempScore if not skipCounting: H0Score += MAPScore[0] H1Score += MAPScore[1] return H0Score, H1Score
def linkSegments(inSeg, middleSeg, outSeg, parameterRobot): print "\nNew Contig" middleRead = middleSeg.longread revmiddleRead = cleaner.reverseStrand(middleRead) extendedRead = [] score, scoreRev = -1, -1 inIndex = 0 startjTmp = -1 while ((max(score, scoreRev) < 30 or startjTmp > 10) and inIndex < len(inSeg)): inRead = inSeg[inIndex].longread revinRead = cleaner.reverseStrand(inRead) score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( inRead, middleRead, parameterRobot) scoreRev, returnalignedSeq1Rev, returnalignedSeq2Rev, startiRev, startjRev, endiRev, endjRev = cleaner.SWAlignment( revinRead, middleRead, parameterRobot) if score > scoreRev: startjTmp = startj print "starti, startj , endi, endj", starti, startj, endi, endj else: startjTmp = startjRev print "startiRev, startjRev , endiRev, endjRev", startiRev, startjRev, endiRev, endjRev print "startjTmp", startjTmp print "score, scoreRev,inIndex ", score, scoreRev, inIndex inIndex += 1 if score > scoreRev: for eachbase in inRead[0:starti]: extendedRead.append(eachbase) print "len(extendedRead)", len(extendedRead) for eachbase in middleRead: extendedRead.append(eachbase) print "len(extendedRead)", len(extendedRead) else: for eachbase in revinRead[0:startiRev]: extendedRead.append(eachbase) print "len(extendedRead)", len(extendedRead) for eachbase in middleRead: extendedRead.append(eachbase) print "len(extendedRead)", len(extendedRead) score, scoreRev = -1, -1 # hack outIndex = 0 endiTmp = 1000 outRead, revoutRead = [], [] while ((max(score, scoreRev) < 30 or endiTmp < len(middleRead) - 10) and outIndex < len(outSeg)): outRead = outSeg[outIndex].longread revoutRead = cleaner.reverseStrand(outRead) score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( middleRead, outRead, parameterRobot) scoreRev, returnalignedSeq1Rev, returnalignedSeq2Rev, startiRev, startjRev, endiRev, endjRev = cleaner.SWAlignment( middleRead, revoutRead, parameterRobot) if score > scoreRev: endiTmp = endi print "starti, startj , endi, endj", starti, startj, endi, endj else: endiTmp = endiRev print " startiRev, startjRev , endiRev, endjRev ", startiRev, startjRev, endiRev, endjRev print "score, scoreRev, outIndex", score, scoreRev, outIndex outIndex += 1 correctionTerm = len(middleRead) - endiTmp if score > scoreRev: for eachbase in outRead[endj + correctionTerm:len(outRead)]: extendedRead.append(eachbase) print "len(extendedRead) forw", len(extendedRead) else: for eachbase in revoutRead[endjRev + correctionTerm:len(revoutRead)]: extendedRead.append(eachbase) print "len(extendedRead) rev", len(extendedRead) return extendedRead
def decideMiddlePiece(commonList, leftSeg, rightSeg, parameterRobot): middleMap = [] threshold = 10 perr = parameterRobot.p H0, H1 = 0, 0 for i in range(len(commonList)): read = commonList[i].longread score, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( read, leftSeg[0], parameterRobot) readRev = cleaner.reverseStrand(read) scoreRev, returnalignedSeq1Rev, returnalignedSeq2Rev, startiRev, startjRev, endiRev, endjRev = cleaner.SWAlignment( readRev, leftSeg[0], parameterRobot) if score > scoreRev: forwardCk = True scoreL0 = score else: forwardCk = False scoreL0 = scoreRev if forwardCk: tmpread = read else: tmpread = readRev scoreL1, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( tmpread, leftSeg[1], parameterRobot) scoreR0, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( tmpread, rightSeg[0], parameterRobot) scoreR1, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj = cleaner.SWAlignment( tmpread, rightSeg[1], parameterRobot) skipCounting = False # Filering reads if startj > threshold or endj < len(leftSeg[0]) - threshold: skipCounting = True # MAP decision rule if not skipCounting: MAPScoreL = [0, 0] for i in range(2): #tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMP(leftSeg[i],tmpread, parameterRobot) #lengthSeed = len(leftSeg[i]) #countConfirm, countDel, countIns, countSub = cleaner.countEdits(returnalignedSeq2, returnalignedSeq1) # care for the 1,2 and its fcn #confirmNoDelete,confirmNoInsert = countConfirm , lengthSeed - countIns #MAPScoreL[i] = math.log(1-perr)*confirmNoDelete + math.log(1-perr)*confirmNoInsert + math.log(perr)*countDel + math.log(perr*perr/3)*countSub + math.log(perr/3)*countIns + math.log(numPath) tempScore, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj, numPath = cleaner.SWAlignmentFixRefMPQS( leftSeg[i], tmpread, parameterRobot) MAPScoreL[i] = tempScore MAPScoreR = [0, 0] for i in range(2): #tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMP(rightSeg[i],tmpread, parameterRobot) #lengthSeed = len(leftSeg[i]) #countConfirm, countDel, countIns, countSub = cleaner.countEdits(returnalignedSeq2, returnalignedSeq1) # care for the 1,2 and its fcn #confirmNoDelete,confirmNoInsert = countConfirm , lengthSeed - countIns #MAPScoreR[i] = math.log(1-perr)*confirmNoDelete + math.log(1-perr)*confirmNoInsert + math.log(perr)*countDel + math.log(perr*perr/3)*countSub + math.log(perr/3)*countIns + math.log(numPath) tempScore, returnalignedSeq1, returnalignedSeq2, starti, startj, endi, endj, numPath = cleaner.SWAlignmentFixRefMPQS( rightSeg[i], tmpread, parameterRobot) MAPScoreR[i] = tempScore H0Score = max(MAPScoreL[0] + MAPScoreR[0], MAPScoreL[1] + MAPScoreR[1]) H1Score = max(MAPScoreL[1] + MAPScoreR[0], MAPScoreL[0] + MAPScoreR[1]) H0 = H0 + H0Score H1 = H1 + H1Score if H0 > H1: middleMap = [0, 1] else: middleMap = [1, 0] print "Middle : H0, H1", H0, H1 return middleMap, H0, H1
def decideMiddlePiece(commonList, leftSeg, rightSeg, parameterRobot): middleMap = [] threshold = 10 perr = parameterRobot.p H0 , H1 = 0 ,0 for i in range(len(commonList)): read = commonList[i].longread score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(read, leftSeg[0], parameterRobot) readRev = cleaner.reverseStrand(read) scoreRev, returnalignedSeq1Rev, returnalignedSeq2Rev , startiRev, startjRev , endiRev, endjRev = cleaner.SWAlignment(readRev, leftSeg[0], parameterRobot) if score > scoreRev : forwardCk = True scoreL0 = score else: forwardCk = False scoreL0 = scoreRev if forwardCk : tmpread = read else: tmpread = readRev scoreL1, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(tmpread, leftSeg[1], parameterRobot) scoreR0 , returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(tmpread, rightSeg[0], parameterRobot) scoreR1 , returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(tmpread, rightSeg[1], parameterRobot) skipCounting = False # Filering reads if startj > threshold or endj < len(leftSeg[0]) - threshold: skipCounting = True # MAP decision rule if not skipCounting: MAPScoreL= [0,0] for i in range(2): #tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMP(leftSeg[i],tmpread, parameterRobot) #lengthSeed = len(leftSeg[i]) #countConfirm, countDel, countIns, countSub = cleaner.countEdits(returnalignedSeq2, returnalignedSeq1) # care for the 1,2 and its fcn #confirmNoDelete,confirmNoInsert = countConfirm , lengthSeed - countIns #MAPScoreL[i] = math.log(1-perr)*confirmNoDelete + math.log(1-perr)*confirmNoInsert + math.log(perr)*countDel + math.log(perr*perr/3)*countSub + math.log(perr/3)*countIns + math.log(numPath) tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMPQS(leftSeg[i],tmpread, parameterRobot) MAPScoreL[i] =tempScore MAPScoreR= [0,0] for i in range(2): #tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMP(rightSeg[i],tmpread, parameterRobot) #lengthSeed = len(leftSeg[i]) #countConfirm, countDel, countIns, countSub = cleaner.countEdits(returnalignedSeq2, returnalignedSeq1) # care for the 1,2 and its fcn #confirmNoDelete,confirmNoInsert = countConfirm , lengthSeed - countIns #MAPScoreR[i] = math.log(1-perr)*confirmNoDelete + math.log(1-perr)*confirmNoInsert + math.log(perr)*countDel + math.log(perr*perr/3)*countSub + math.log(perr/3)*countIns + math.log(numPath) tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMPQS(rightSeg[i],tmpread, parameterRobot) MAPScoreR[i] = tempScore H0Score = max( MAPScoreL[0] + MAPScoreR[0] , MAPScoreL[1] + MAPScoreR[1]) H1Score = max( MAPScoreL[1] + MAPScoreR[0], MAPScoreL[0] + MAPScoreR[1]) H0 = H0 + H0Score H1 = H1 + H1Score if H0 > H1: middleMap = [0,1] else: middleMap = [1,0] print "Middle : H0, H1", H0, H1 return middleMap , H0, H1
def findVoteScore(in1List, leftSeg, parameterRobot): H0Score, H1Score = 0,0 threshold = 10 perr = parameterRobot.p print " len(in1List) ", len(in1List) for i in range(len(in1List)): # Determine F/B read = in1List[i].longread score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(read, leftSeg[0], parameterRobot) reverseread = cleaner.reverseStrand(read) scoreRev, returnalignedSeq1Rev, returnalignedSeq2Rev , startiRev, startjRev , endiRev, endjRev = cleaner.SWAlignment(reverseread, leftSeg[0], parameterRobot) if score > scoreRev: forwardCk = True scoreLeft0 = score else: forwardCk =False scoreLeft0 = scoreRev if forwardCk : score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(read, leftSeg[1], parameterRobot) scoreLeft1 = score else: score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(reverseread, leftSeg[1], parameterRobot) scoreLeft1 = score print "scoreLeft0, scoreLeft1 " , scoreLeft0, scoreLeft1 skipCounting = False MAPScore= [0,0] # Filering reads if startj > threshold or endj < len(leftSeg[0]) - threshold: skipCounting = True # MAP decision rule if forwardCk : for i in range(2): #tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMP(leftSeg[i],read, parameterRobot) #lengthSeed = len(leftSeg[i]) #countConfirm, countDel, countIns, countSub = cleaner.countEdits(returnalignedSeq2, returnalignedSeq1) # care for the 1,2 and its fcn #confirmNoDelete,confirmNoInsert = countConfirm , lengthSeed - countIns #MAPScore[i] = math.log(1-perr)*confirmNoDelete + math.log(1-perr)*confirmNoInsert + math.log(perr)*countDel + math.log(perr*perr/3)*countSub + math.log(perr/3)*countIns + math.log(numPath) tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMPQS(leftSeg[i],read, parameterRobot) MAPScore[i] =tempScore else: for i in range(2): #tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMP(leftSeg[i],reverseread, parameterRobot) #lengthSeed = len(leftSeg[i]) #countConfirm, countDel, countIns, countSub = cleaner.countEdits(returnalignedSeq2, returnalignedSeq1) # care for the 1,2 and its fcn #confirmNoDelete,confirmNoInsert = countConfirm , lengthSeed - countIns #MAPScore[i] = math.log(1-perr)*confirmNoDelete + math.log(1-perr)*confirmNoInsert + math.log(perr)*countDel + math.log(perr*perr/3)*countSub + math.log(perr/3)*countIns + math.log(numPath) tempScore, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj, numPath= cleaner.SWAlignmentFixRefMPQS(leftSeg[i],reverseread, parameterRobot) MAPScore[i] = tempScore if not skipCounting: H0Score += MAPScore[0] H1Score += MAPScore[1] return H0Score , H1Score
def linkSegments(inSeg, middleSeg, outSeg, parameterRobot): print "\nNew Contig" middleRead = middleSeg.longread revmiddleRead = cleaner.reverseStrand(middleRead) extendedRead = [] score, scoreRev = -1, -1 inIndex = 0 startjTmp = -1 while ((max(score, scoreRev)< 30 or startjTmp> 10) and inIndex < len(inSeg)): inRead = inSeg[inIndex].longread revinRead = cleaner.reverseStrand(inRead) score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(inRead, middleRead, parameterRobot) scoreRev, returnalignedSeq1Rev, returnalignedSeq2Rev , startiRev, startjRev , endiRev, endjRev = cleaner.SWAlignment(revinRead, middleRead, parameterRobot) if score > scoreRev: startjTmp = startj print "starti, startj , endi, endj", starti, startj , endi, endj else: startjTmp = startjRev print "startiRev, startjRev , endiRev, endjRev", startiRev, startjRev , endiRev, endjRev print "startjTmp", startjTmp print "score, scoreRev,inIndex " ,score, scoreRev ,inIndex inIndex += 1 if score > scoreRev : for eachbase in inRead[0:starti]: extendedRead.append(eachbase) print "len(extendedRead)", len(extendedRead) for eachbase in middleRead: extendedRead.append(eachbase) print "len(extendedRead)", len(extendedRead) else: for eachbase in revinRead[0:startiRev]: extendedRead.append(eachbase) print "len(extendedRead)", len(extendedRead) for eachbase in middleRead: extendedRead.append(eachbase) print "len(extendedRead)", len(extendedRead) score, scoreRev = -1, -1 # hack outIndex = 0 endiTmp = 1000 outRead, revoutRead = [] , [] while ((max(score, scoreRev)< 30 or endiTmp < len(middleRead)-10)and outIndex < len(outSeg)): outRead = outSeg[outIndex].longread revoutRead = cleaner.reverseStrand(outRead) score, returnalignedSeq1, returnalignedSeq2 , starti, startj , endi, endj = cleaner.SWAlignment(middleRead,outRead , parameterRobot) scoreRev, returnalignedSeq1Rev, returnalignedSeq2Rev , startiRev, startjRev , endiRev, endjRev = cleaner.SWAlignment(middleRead, revoutRead, parameterRobot) if score > scoreRev : endiTmp = endi print "starti, startj , endi, endj", starti, startj , endi, endj else: endiTmp = endiRev print " startiRev, startjRev , endiRev, endjRev ", startiRev, startjRev , endiRev, endjRev print "score, scoreRev, outIndex" ,score, scoreRev, outIndex outIndex += 1 correctionTerm = len(middleRead) - endiTmp if score > scoreRev : for eachbase in outRead[endj+correctionTerm:len(outRead)]: extendedRead.append(eachbase) print "len(extendedRead) forw", len(extendedRead) else: for eachbase in revoutRead[endjRev+correctionTerm:len(revoutRead)]: extendedRead.append(eachbase) print "len(extendedRead) rev", len(extendedRead) return extendedRead