workingDir=os.getcwd() chkPointName = None outFile = file(os.path.join(workingDir,outFastaName),'a') qsubOptions='' # complete restart timer.mark('fastaSplit') if not RESUME: startName = originalFasta inFileName = startName seqSize = os.stat(originalFasta).st_size partCt = seqSize/seqSliceSize +1 seqSliceNames = fasta.splitFasta(originalFasta,partCt,tmpDir=workingDir) tmpf=file(faNameFile,'w') tmpf.write('\n'.join(seqSliceNames)) tmpf.close() # reuse split files if RESUME: seqSliceNames = [] tmpf = file(faNameFile) for l in tmpf: try: print seqSliceNames.append( os.path.join(os.getcwd(), os.path.split(l.rstrip())[1])) except:
if argProblem: print __doc__ return(-1) if maxRec != None: fn,=args recCt=fasta.fastaCount(fn) count = recCt/maxRec + int(recCt%maxRec!=0) else: count,fn= args count = int(count) nG = niceName(fn,count) fasta.splitFasta(fn,splitCt=count,nameGenerator=nG) return(0) # we did it! def niceName(rootName,maxN): n=0 fmt = '_%%0%dd' % int(ceil(log10(maxN))) base,ext = os.path.splitext(rootName) while True: name=base+ fmt% (n) + ext yield file(name,'w'),name n+=1
def submitBlast( self, thingToBlast, dbPath, N=12, parameters="-p blastn", formatParameters="-m 8", prefix="GrBl"): """Initiates a BLAST job with the given parameters. thingToBlast can be: - a path to a FASTA file - anything or list of things that evaluates to a valid FASTA string(s) when the str() method is applied. (this includes fasta.Record objects) N is the maximum number of parallel BLASTs to execute. (the sequences will be split up into at most N files.) parameters can be either an explicit parameter string to append to the blast call (ie. '-p blastn -b 3') or a dictionary of { parameter:value } pairs, which will be translated to '-parameter1 value1 -parameter2 value2' submitBlast returns a list of the names of the threads which are running this Blast. """ # Store for database use self.parameters = parameters self.dbPath = dbPath ############################ # Build the query queryPath = None if isinstance( thingToBlast, str ) and os.path.lexists( thingToBlast ): # path to a FASTA file queryPath = thingToBlast self.queryFile = queryPath elif isinstance( thingToBlast, (tuple, list) ): ( of, queryPath ) = fasta.mystemp( suffix='.fasta', dir=self.tmpDir ) for elt in thingToBlast: of.write(str(elt) + "\n") of.close() self.tempFiles.append( queryPath ) else: ( of, queryPath ) = fasta.mystemp( suffix='.fasta', dir=self.tmpDir ) of.write(str(thingToBlast)) of.close() self.tempFiles.append( queryPath ) ############################ # Split the query queryFiles = fasta.splitFasta( queryPath, N, tmpDir=self.tmpDir ) self.tempFiles = self.tempFiles + queryFiles ############################ # Build the BLAST line blastLine = "%s %s -d %s" % ( self.blastPath, formatParameters, dbPath ) if isinstance( parameters, str ): blastLine = blastLine + " " + parameters else: parameterStrings = map( lambda x: "-%s %s" % ( x, parameters[x] ), parameters.keys() ) blastLine = blastLine + " " + " ".join( parameterStrings ) threads = [] for qf in queryFiles: outfile = qf.replace(".fasta",".br") self.tempFiles.append( outfile ) command = "%s -i %s -o %s" % ( blastLine, qf, outfile ) thread = self.submitThread( command, qrshArgs="-l arch='fbsd-amd64'", prefix=prefix ) #thread = self.submitThread( command, qrshArgs="-l arch='fbsd-amd64' -l mf=1.0G" ) self.activeQueries.append( ( thread, outfile ) ) threads.append( thread ) return threads