def performPhasing(folderName, mummerLink): print "performPhasing" ''' 1. Interface from alignmentBridge.py : shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot, toProcessList) cleaner.cleaning([noisyReads,noisyReads] ,shortToLongMap, toProcessList,indelRobot, "init") in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning([noisyReads, noisyReads],shortToLongMap, toProcessList,indelRobot, "vote") extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList,indelRobot,longReadToUse, True) 2. Format of input data data : bigDumpList.append([flankingList, repeatList, repeatPathway, flankingPathsList]) 3. IO : a) Input : repeatSpecification.txt, phasingSeedName_Double.fasta, graph G b) Output : improved4.fasta 3. Algorithm: a) reformatNoisyReads b) reformatToProcessList c) formShortToLongMapping ''' json_data = open(folderName + 'repeatSpecification.txt', 'r') loadData = json.load(json_data) G = commonLib.seqGraph(0) G.loadFromFile(folderName, "phaseStringGraph1") lenDicRR = commonLib.obtainLength(folderName, "phasingSeedName_Double.fasta") lenDicCC = commonLib.obtainLength(folderName, "improved3_Double.fasta") N1 = len(lenDicCC) lenDicCR = dict(lenDicCC.items() + lenDicRR.items()) for eachitem in loadData: print eachitem flankingList, repeatList, repeatPathway, flankingPathsList = eachitem[0], eachitem[1], eachitem[2], eachitem[3] noisyReads, dicToOriginal, dicFromOriginal = reformatNoisyReads(folderName, flankingList, repeatList, N1) toProcessList = reformatToProcessList(folderName , flankingList, repeatList, dicFromOriginal, N1) shortToLongMap = formShortToLongMapping(folderName, G, toProcessList, dicFromOriginal,dicToOriginal, lenDicCR, N1 ) indelRobot = createIndelRobot(folderName) cleaner.cleaning([noisyReads, noisyReads] , shortToLongMap, toProcessList, indelRobot, "init") in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning([noisyReads, noisyReads], shortToLongMap, toProcessList, indelRobot, "vote") extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList, indelRobot, longReadToUse, True) if extendResult != -1: print "extendResult: ", extendResult assert(1==2)
def indelMSABridging(f2, currentNode, noisyReads, p, snpRate, flankinglen, parameterRobot): # Need to loop over all the xnodes canResolve, kmerPairsList = False, [] # Using contig Creator indelRobot = common.parameterRobot() indelRobot.defaultFolder = parameterRobot.defaultFolder indelRobot.setReadStat(Nshort=parameterRobot.N, Nlong=parameterRobot.N, Lshort=parameterRobot.L, Llong=parameterRobot.L, p=parameterRobot.p, longOnly=True) indelRobot.setGenomeStat(G=parameterRobot.G, lrep=500, lsnp=200, lint=50) indelRobot.setThresholdPara(liid=30, thresForRandom=0.5, thresForins=0.4, thresFordel=0.4, insMin=4, delMin=4, thresholdForSupport=0.15, subthreshold=9, editsub=-10, editins=-1, editdel=-1, editmatch=1, lookRange=15) #indelRobot.tunePara() indelRobot.snprate = snpRate # toProcessList : in1IndexList, in2IndexList, out1IndexList, out2IndexList, commonIndexList # shortToLongMap : indexlong indexshort jstart jend istart iend shortToLongMap, toProcessList = [], [] toProcessList = formToProcessList(f2, noisyReads, currentNode, indelRobot, flankinglen) if len(toProcessList[4]) == 0: return False, [] shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot, toProcessList) cleaner.cleaning([noisyReads, noisyReads], shortToLongMap, toProcessList, indelRobot, "init") in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning( [noisyReads, noisyReads], shortToLongMap, toProcessList, indelRobot, "vote") extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList, indelRobot, longReadToUse, True) if extendResult == 0: canResolve = True edgeWt = currentNode.listOfPrevNodes[0][1] inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-( edgeWt + 1)] edgeWt = currentNode.listOfNextNodes[0][1] outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt] kmerPairsList.append([inKmerIndex, outKmerIndex]) edgeWt = currentNode.listOfPrevNodes[1][1] inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-( edgeWt + 1)] edgeWt = currentNode.listOfNextNodes[1][1] outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt] kmerPairsList.append([inKmerIndex, outKmerIndex]) elif extendResult == 1: canResolve = True edgeWt = currentNode.listOfPrevNodes[0][1] inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-( edgeWt + 1)] edgeWt = currentNode.listOfNextNodes[1][1] outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt] kmerPairsList.append([inKmerIndex, outKmerIndex]) edgeWt = currentNode.listOfPrevNodes[1][1] inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-( edgeWt + 1)] edgeWt = currentNode.listOfNextNodes[0][1] outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt] kmerPairsList.append([inKmerIndex, outKmerIndex]) elif extendResult == -1: canResolve = False kmerPairsList = [] print kmerPairsList return canResolve, kmerPairsList
def indelMSABridging(f2, currentNode,noisyReads,p, snpRate,flankinglen, parameterRobot): # Need to loop over all the xnodes canResolve, kmerPairsList = False, [] # Using contig Creator indelRobot = common.parameterRobot() indelRobot.defaultFolder = parameterRobot.defaultFolder indelRobot.setReadStat( Nshort= parameterRobot.N, Nlong= parameterRobot.N, Lshort= parameterRobot.L, Llong= parameterRobot.L, p= parameterRobot.p , longOnly = True) indelRobot.setGenomeStat(G = parameterRobot.G, lrep=500, lsnp=200, lint=50 ) indelRobot.setThresholdPara(liid = 30, thresForRandom= 0.5,thresForins =0.4, thresFordel=0.4, insMin=4, delMin=4,thresholdForSupport= 0.15, subthreshold= 9, editsub= -10, editins= -1, editdel= -1, editmatch = 1, lookRange =15) #indelRobot.tunePara() indelRobot.snprate = snpRate # toProcessList : in1IndexList, in2IndexList, out1IndexList, out2IndexList, commonIndexList # shortToLongMap : indexlong indexshort jstart jend istart iend shortToLongMap,toProcessList = [], [] toProcessList = formToProcessList(f2, noisyReads, currentNode, indelRobot, flankinglen) if len(toProcessList[4] ) == 0 : return False, [] shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot, toProcessList) cleaner.cleaning([noisyReads,noisyReads] ,shortToLongMap, toProcessList,indelRobot, "init") in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning([noisyReads, noisyReads],shortToLongMap, toProcessList,indelRobot, "vote") extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList,indelRobot,longReadToUse, True) if extendResult == 0 : canResolve = True edgeWt = currentNode.listOfPrevNodes[0][1] inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(edgeWt +1)] edgeWt = currentNode.listOfNextNodes[0][1] outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt ] kmerPairsList.append([inKmerIndex,outKmerIndex ]) edgeWt = currentNode.listOfPrevNodes[1][1] inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(edgeWt +1)] edgeWt = currentNode.listOfNextNodes[1][1] outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt ] kmerPairsList.append([inKmerIndex,outKmerIndex ]) elif extendResult == 1 : canResolve = True edgeWt = currentNode.listOfPrevNodes[0][1] inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(edgeWt +1)] edgeWt = currentNode.listOfNextNodes[1][1] outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt ] kmerPairsList.append([inKmerIndex,outKmerIndex ]) edgeWt = currentNode.listOfPrevNodes[1][1] inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(edgeWt +1)] edgeWt = currentNode.listOfNextNodes[0][1] outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt ] kmerPairsList.append([inKmerIndex,outKmerIndex ]) elif extendResult == -1: canResolve = False kmerPairsList = [] print kmerPairsList return canResolve, kmerPairsList
def tweet_clean(self): self.clean_tweets = cleaning(self.tweets).clean_tweets