예제 #1
0
def filterRedundantMatches( inFile, outFile ):
    """
    When a pairwise alignment is launched ~ all-by-all (ie one batch against all chunks),
    one filters the redundant matches. For instance we keep 'chunk3-1-100-chunk7-11-110-...'
    and we discards 'chunk7-11-110-chunk3-1-100-...'.
    Also we keep 'chunk5-1-100-chunk5-11-110-...' and we discards
    'chunk5-11-110-chunk5-1-100-...'.
    For this of course the results need to be sorted by query, on plus strand,
    and in ascending coordinates (always the case with Blaster).
    """
    inFileHandler = open( inFile, "r" )
    outFileHandler = open( outFile, "w" )
    iAlign = Align()
    countMatches = 0
    tick = 100000
    while True:
        line = inFileHandler.readline()
        if line == "":
            break
        countMatches += 1
        iAlign.setFromString( line )
        if "chunk" not in iAlign.range_query.seqname \
               or "chunk" not in iAlign.range_subject.seqname:
            print "ERROR: 'chunk' not in seqname"
            sys.exit(1)
        if int(iAlign.range_query.seqname.split("chunk")[1]) < int(iAlign.range_subject.seqname.split("chunk")[1]):
            iAlign.write( outFileHandler )
        elif int(iAlign.range_query.seqname.split("chunk")[1]) == int(iAlign.range_subject.seqname.split("chunk")[1]):
            if iAlign.range_query.getMin() < iAlign.range_subject.getMin():
                iAlign.write( outFileHandler )
        if countMatches % tick == 0:   # need to free buffer frequently as file can be big
            outFileHandler.flush()
            os.fsync( outFileHandler.fileno() )
    inFileHandler.close()
    outFileHandler.close()
예제 #2
0
 def updateScoresInFile( inFile, outFile ):
     inHandler = open( inFile, "r" )
     outHandler = open( outFile, "w" )
     iAlign = Align()
     
     while True:
         line = inHandler.readline()
         if line == "":
             break
         iAlign.reset()
         iAlign.setFromString( line, "\t" )
         iAlign.updateScore()
         iAlign.write( outHandler )
         
     inHandler.close()
     outHandler.close()
 def retrieveInitialSequenceHeadersForAlignFile( self, dNew2Init ):
     inFileHandler = open( self._inFile, "r" )
     outFileHandler = open( self._outFile, "w" )
     a = Align()
     while True:
         line = inFileHandler.readline()
         if line == "":
             break
         a.setFromTuple( line.split("\t") )
         nameToBeReplaced = a.range_query.seqname
         if dNew2Init.has_key( nameToBeReplaced ):
             a.range_query.seqname = dNew2Init[ nameToBeReplaced ]
         nameToBeReplaced = a.range_subject.seqname
         if dNew2Init.has_key( nameToBeReplaced ):
             a.range_subject.seqname = dNew2Init[ nameToBeReplaced ]
         a.write( outFileHandler )
     inFileHandler.close()
     outFileHandler.close()