Beispiel #1
0
def performPhasing(folderName, mummerLink):
    print "performPhasing"
    '''
    1. Interface from alignmentBridge.py : 
        shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot, toProcessList)
        cleaner.cleaning([noisyReads,noisyReads] ,shortToLongMap, toProcessList,indelRobot, "init")
        in1List, in2List, out1List, out2List, commonList, longReadToUse  = cleaner.cleaning([noisyReads, noisyReads],shortToLongMap, toProcessList,indelRobot, "vote")
        extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList,indelRobot,longReadToUse, True)
    
    2. Format of input data data : 
        bigDumpList.append([flankingList, repeatList, repeatPathway, flankingPathsList])
    
    3. IO : 
        a) Input :
            repeatSpecification.txt, phasingSeedName_Double.fasta, graph G 
        b) Output :
            improved4.fasta
            
    3. Algorithm: 
        a) reformatNoisyReads 
        b) reformatToProcessList
        c) formShortToLongMapping
    
    '''

    json_data = open(folderName + 'repeatSpecification.txt', 'r')
    loadData = json.load(json_data)
    
    G = commonLib.seqGraph(0)
    G.loadFromFile(folderName, "phaseStringGraph1")
    
    lenDicRR = commonLib.obtainLength(folderName, "phasingSeedName_Double.fasta")
    
    lenDicCC = commonLib.obtainLength(folderName, "improved3_Double.fasta")
    N1 = len(lenDicCC)
    
    lenDicCR = dict(lenDicCC.items() + lenDicRR.items())
    
    for eachitem in loadData:
        print eachitem
        flankingList, repeatList, repeatPathway, flankingPathsList = eachitem[0], eachitem[1], eachitem[2], eachitem[3] 
        
        noisyReads, dicToOriginal, dicFromOriginal = reformatNoisyReads(folderName, flankingList, repeatList, N1)
        
        toProcessList = reformatToProcessList(folderName , flankingList, repeatList, dicFromOriginal, N1)

        shortToLongMap = formShortToLongMapping(folderName, G, toProcessList, dicFromOriginal,dicToOriginal, lenDicCR, N1 )
        
        indelRobot = createIndelRobot(folderName)
        
        cleaner.cleaning([noisyReads, noisyReads] , shortToLongMap, toProcessList, indelRobot, "init")
        in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning([noisyReads, noisyReads], shortToLongMap, toProcessList, indelRobot, "vote")
        extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList, indelRobot, longReadToUse, True)
        
        if extendResult != -1:
            print "extendResult: ", extendResult
            assert(1==2)
def indelMSABridging(f2, currentNode, noisyReads, p, snpRate, flankinglen,
                     parameterRobot):

    # Need to loop over all the xnodes
    canResolve, kmerPairsList = False, []

    # Using contig Creator
    indelRobot = common.parameterRobot()
    indelRobot.defaultFolder = parameterRobot.defaultFolder
    indelRobot.setReadStat(Nshort=parameterRobot.N,
                           Nlong=parameterRobot.N,
                           Lshort=parameterRobot.L,
                           Llong=parameterRobot.L,
                           p=parameterRobot.p,
                           longOnly=True)
    indelRobot.setGenomeStat(G=parameterRobot.G, lrep=500, lsnp=200, lint=50)
    indelRobot.setThresholdPara(liid=30,
                                thresForRandom=0.5,
                                thresForins=0.4,
                                thresFordel=0.4,
                                insMin=4,
                                delMin=4,
                                thresholdForSupport=0.15,
                                subthreshold=9,
                                editsub=-10,
                                editins=-1,
                                editdel=-1,
                                editmatch=1,
                                lookRange=15)
    #indelRobot.tunePara()
    indelRobot.snprate = snpRate

    # toProcessList : in1IndexList, in2IndexList, out1IndexList, out2IndexList, commonIndexList
    # shortToLongMap : indexlong    indexshort    jstart    jend    istart    iend

    shortToLongMap, toProcessList = [], []

    toProcessList = formToProcessList(f2, noisyReads, currentNode, indelRobot,
                                      flankinglen)
    if len(toProcessList[4]) == 0:
        return False, []

    shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot,
                                    toProcessList)

    cleaner.cleaning([noisyReads, noisyReads], shortToLongMap, toProcessList,
                     indelRobot, "init")
    in1List, in2List, out1List, out2List, commonList, longReadToUse = cleaner.cleaning(
        [noisyReads, noisyReads], shortToLongMap, toProcessList, indelRobot,
        "vote")

    extendResult = extender.readExtender(in1List, in2List, out1List, out2List,
                                         commonList, indelRobot, longReadToUse,
                                         True)

    if extendResult == 0:
        canResolve = True

        edgeWt = currentNode.listOfPrevNodes[0][1]
        inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(
            edgeWt + 1)]

        edgeWt = currentNode.listOfNextNodes[0][1]
        outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt]

        kmerPairsList.append([inKmerIndex, outKmerIndex])

        edgeWt = currentNode.listOfPrevNodes[1][1]
        inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(
            edgeWt + 1)]

        edgeWt = currentNode.listOfNextNodes[1][1]
        outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt]

        kmerPairsList.append([inKmerIndex, outKmerIndex])
    elif extendResult == 1:
        canResolve = True
        edgeWt = currentNode.listOfPrevNodes[0][1]
        inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(
            edgeWt + 1)]

        edgeWt = currentNode.listOfNextNodes[1][1]
        outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt]

        kmerPairsList.append([inKmerIndex, outKmerIndex])

        edgeWt = currentNode.listOfPrevNodes[1][1]
        inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(
            edgeWt + 1)]

        edgeWt = currentNode.listOfNextNodes[0][1]
        outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt]

        kmerPairsList.append([inKmerIndex, outKmerIndex])

    elif extendResult == -1:
        canResolve = False
        kmerPairsList = []

    print kmerPairsList

    return canResolve, kmerPairsList
def indelMSABridging(f2, currentNode,noisyReads,p, snpRate,flankinglen, parameterRobot):
    
    # Need to loop over all the xnodes 
    canResolve, kmerPairsList = False, []
    
    # Using contig Creator 
    indelRobot = common.parameterRobot()
    indelRobot.defaultFolder = parameterRobot.defaultFolder
    indelRobot.setReadStat( Nshort= parameterRobot.N, Nlong=  parameterRobot.N, Lshort= parameterRobot.L, Llong= parameterRobot.L, p= parameterRobot.p , longOnly = True)
    indelRobot.setGenomeStat(G = parameterRobot.G, lrep=500, lsnp=200, lint=50 )
    indelRobot.setThresholdPara(liid = 30, thresForRandom= 0.5,thresForins =0.4, thresFordel=0.4, insMin=4, delMin=4,thresholdForSupport= 0.15, subthreshold= 9, editsub= -10, editins= -1, editdel= -1, editmatch = 1, lookRange =15)
    #indelRobot.tunePara()
    indelRobot.snprate = snpRate
    

    # toProcessList : in1IndexList, in2IndexList, out1IndexList, out2IndexList, commonIndexList
    # shortToLongMap : indexlong    indexshort    jstart    jend    istart    iend    
    
    shortToLongMap,toProcessList = [], []
    
    toProcessList = formToProcessList(f2, noisyReads, currentNode, indelRobot, flankinglen)
    if len(toProcessList[4] ) == 0 : 
        return False, []
    
    shortToLongMap = formRelatedMap(f2, noisyReads, currentNode, indelRobot, toProcessList)
    
    
    cleaner.cleaning([noisyReads,noisyReads] ,shortToLongMap, toProcessList,indelRobot, "init")
    in1List, in2List, out1List, out2List, commonList, longReadToUse  = cleaner.cleaning([noisyReads, noisyReads],shortToLongMap, toProcessList,indelRobot, "vote")

    
    extendResult = extender.readExtender(in1List, in2List, out1List, out2List, commonList,indelRobot,longReadToUse, True)
    
    
    
    if extendResult == 0 : 
        canResolve = True
        
        edgeWt = currentNode.listOfPrevNodes[0][1]
        inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(edgeWt +1)]
        
        edgeWt = currentNode.listOfNextNodes[0][1]
        outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt ]
        
        kmerPairsList.append([inKmerIndex,outKmerIndex ])
        
        edgeWt = currentNode.listOfPrevNodes[1][1]
        inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(edgeWt +1)]
        
        edgeWt = currentNode.listOfNextNodes[1][1]
        outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt ]
        
        kmerPairsList.append([inKmerIndex,outKmerIndex ])
    elif extendResult == 1 :
        canResolve = True
        edgeWt = currentNode.listOfPrevNodes[0][1]
        inKmerIndex = currentNode.listOfPrevNodes[0][0].nodeIndexList[-(edgeWt +1)]
        
        edgeWt = currentNode.listOfNextNodes[1][1]
        outKmerIndex = currentNode.listOfNextNodes[1][0].nodeIndexList[edgeWt ]
        
        kmerPairsList.append([inKmerIndex,outKmerIndex ])
        
        edgeWt = currentNode.listOfPrevNodes[1][1]
        inKmerIndex = currentNode.listOfPrevNodes[1][0].nodeIndexList[-(edgeWt +1)]
        
        edgeWt = currentNode.listOfNextNodes[0][1]
        outKmerIndex = currentNode.listOfNextNodes[0][0].nodeIndexList[edgeWt ]
        
        kmerPairsList.append([inKmerIndex,outKmerIndex ])
        
    elif extendResult == -1:
        canResolve = False
        kmerPairsList = []
        
    print kmerPairsList
    
    return canResolve, kmerPairsList 
 def tweet_clean(self):
     self.clean_tweets = cleaning(self.tweets).clean_tweets