Ejemplo n.º 1
0
 def mergeFile( inFile, outFile="" ):
     if outFile == "":
         outFile = "%s.merged" % ( inFile )
     if os.path.exists( outFile ):
         os.remove( outFile )
         
     tmpFile = "%s.sorted" % ( inFile )
     AlignUtils.sortAlignFile( inFile, tmpFile )
     
     tmpF = open( tmpFile, "r" )
     dQrySbj2Aligns = {}
     prevPairQrySbj = ""
     while True:
         line = tmpF.readline()
         if line == "":
             break
         iAlign = Align()
         iAlign.setFromString( line )
         pairQrySbj = "%s_%s" % ( iAlign.getQueryName(), iAlign.getSubjectName() )
         if not dQrySbj2Aligns.has_key( pairQrySbj ):
             if prevPairQrySbj != "":
                 lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )
                 AlignUtils.writeListInFile( lMerged, outFile, "a" )
                 del dQrySbj2Aligns[ prevPairQrySbj ]
                 prevPairQrySbj = pairQrySbj
             else:
                 prevPairQrySbj = pairQrySbj
             dQrySbj2Aligns[ pairQrySbj ] = []
         dQrySbj2Aligns[ pairQrySbj ].append( iAlign )
     lMerged = []
     if len(dQrySbj2Aligns.keys()) > 0:
         lMerged = AlignUtils.mergeList( dQrySbj2Aligns[ prevPairQrySbj ] )
     AlignUtils.writeListInFile( lMerged, outFile, "a" )
     tmpF.close()
     os.remove( tmpFile )
Ejemplo n.º 2
0
def filterRedundantMatches( inFile, outFile ):
    """
    When a pairwise alignment is launched ~ all-by-all (ie one batch against all chunks),
    one filters the redundant matches. For instance we keep 'chunk3-1-100-chunk7-11-110-...'
    and we discards 'chunk7-11-110-chunk3-1-100-...'.
    Also we keep 'chunk5-1-100-chunk5-11-110-...' and we discards
    'chunk5-11-110-chunk5-1-100-...'.
    For this of course the results need to be sorted by query, on plus strand,
    and in ascending coordinates (always the case with Blaster).
    """
    inFileHandler = open( inFile, "r" )
    outFileHandler = open( outFile, "w" )
    iAlign = Align()
    countMatches = 0
    tick = 100000
    while True:
        line = inFileHandler.readline()
        if line == "":
            break
        countMatches += 1
        iAlign.setFromString( line )
        if "chunk" not in iAlign.range_query.seqname \
               or "chunk" not in iAlign.range_subject.seqname:
            print "ERROR: 'chunk' not in seqname"
            sys.exit(1)
        if int(iAlign.range_query.seqname.split("chunk")[1]) < int(iAlign.range_subject.seqname.split("chunk")[1]):
            iAlign.write( outFileHandler )
        elif int(iAlign.range_query.seqname.split("chunk")[1]) == int(iAlign.range_subject.seqname.split("chunk")[1]):
            if iAlign.range_query.getMin() < iAlign.range_subject.getMin():
                iAlign.write( outFileHandler )
        if countMatches % tick == 0:   # need to free buffer frequently as file can be big
            outFileHandler.flush()
            os.fsync( outFileHandler.fileno() )
    inFileHandler.close()
    outFileHandler.close()
Ejemplo n.º 3
0
 def getAlignListFromFile( inFile ):
     lAlignInstances = []
     inFileHandler = open( inFile, "r" )
     while True:
         line = inFileHandler.readline()
         if line == "":
             break
         a = Align()
         a.setFromString( line )
         lAlignInstances.append( a )
     inFileHandler.close()
     return lAlignInstances
Ejemplo n.º 4
0
 def getScoreListFromFile( inFile ):
     lScores = []
     inFileHandler = open( inFile, "r" )
     iAlign = Align()
     while True:
         line = inFileHandler.readline()
         if line == "":
             break
         iAlign.reset()
         iAlign.setFromString( line )
         lScores.append( iAlign.score )
     inFileHandler.close()
     return lScores
Ejemplo n.º 5
0
 def convertAlignFileIntoMapFileWithSubjectsOnQueries( alignFile, mapFile ):
     alignFileHandler = open( alignFile, "r" )
     mapFileHandler = open( mapFile, "w" )
     iAlign = Align()
     while True:
         line = alignFileHandler.readline()
         if line == "":
             break
         iAlign.setFromString( line )
         iMapQ = iAlign.getSubjectAsMapOfQuery()
         iMapQ.write( mapFileHandler )
     alignFileHandler.close()
     mapFileHandler.close()
Ejemplo n.º 6
0
 def convertAlignFileIntoPathFile( alignFile, pathFile ):
     alignFileHandler = open( alignFile, "r" )
     pathFileHandler = open( pathFile, "w" )
     iAlign = Align()
     countAlign = 0
     while True:
         line = alignFileHandler.readline()
         if line == "":
             break
         countAlign += 1
         iAlign.setFromString( line, "\t" )
         pathFileHandler.write( "%i\t%s\n" % ( countAlign, iAlign.toString() ) )
     alignFileHandler.close()
     pathFileHandler.close()
Ejemplo n.º 7
0
 def convertAlignFileIntoMapFileWithQueriesAndSubjects( alignFile, mapFile ):
     alignFileHandler = open( alignFile, "r" )
     mapFileHandler = open( mapFile, "w" )
     iAlign = Align()
     while True:
         line = alignFileHandler.readline()
         if line == "":
             break
         iAlign.setFromString( line )
         iMapQ, iMapS = iAlign.getMapsOfQueryAndSubject()
         iMapQ.write( mapFileHandler )
         iMapS.write( mapFileHandler )
     alignFileHandler.close()
     mapFileHandler.close()
Ejemplo n.º 8
0
 def updateScoresInFile( inFile, outFile ):
     inHandler = open( inFile, "r" )
     outHandler = open( outFile, "w" )
     iAlign = Align()
     
     while True:
         line = inHandler.readline()
         if line == "":
             break
         iAlign.reset()
         iAlign.setFromString( line, "\t" )
         iAlign.updateScore()
         iAlign.write( outHandler )
         
     inHandler.close()
     outHandler.close()