Esempio n. 1
0
 def dbLongestSequences( num, inFileName, outFileName="", verbose=0, minThresh=0 ):
     bsDB = BioseqDB( inFileName )
     if verbose > 0:
         print "nb of input sequences: %i" % ( bsDB.getSize() )
 
     if outFileName == "":
         outFileName = inFileName + ".best" + str(num)
     outFile = open( outFileName, "w" )
     
     if bsDB.getSize()==0:
         return 0
     
     num = int(num)
     if verbose > 0:
         print "keep the %i longest sequences" % ( num )
         if minThresh > 0:
             print "with length > %i bp" % ( minThresh )
         sys.stdout.flush()
         
     # retrieve the length of each input sequence
     tmpLSeqLgth = []
     seqNum = 0
     for bs in bsDB.db:
         seqNum += 1
         tmpLSeqLgth.append( bs.getLength() )
         if verbose > 1:
             print "%d seq %s : %d bp" % ( seqNum, bs.header[0:40], bs.getLength() )
         sys.stdout.flush()
 
     # sort the lengths
     tmpLSeqLgth.sort()
     tmpLSeqLgth.reverse()
 
     # select the longest
     lSeqLgth = []
     for i in xrange( 0, min(num,len(tmpLSeqLgth)) ):
         if tmpLSeqLgth[i] >= minThresh:
             lSeqLgth.append( tmpLSeqLgth[i] )
     if verbose > 0:
         print "selected max length: %i" % ( max(lSeqLgth) )
         print "selected min length: %i" % ( min(lSeqLgth) )
         sys.stdout.flush()
 
     # save the longest
     inFile = open( inFileName )
     seqNum = 0
     nbSave = 0
     for bs in bsDB.db:
         seqNum += 1
         if bs.getLength() >= min(lSeqLgth) and bs.getLength() >= minThresh:
             bs.write( outFile )
             if verbose > 1:
                 print "%d seq %s : saved !" % ( seqNum, bs.header[0:40] )
                 sys.stdout.flush()
             nbSave += 1
         if nbSave == num:
             break
     inFile.close()
     outFile.close()
     if verbose > 0:
         print nbSave, "saved sequences in ", outFileName
         sys.stdout.flush()
         
     return 0