def dbShuffle( inData, outData, verbose=0 ): if CheckerUtils.isExecutableInUserPath("esl-shuffle"): prg = "esl-shuffle" else : prg = "shuffle" genericCmd = prg + " -d INPUT > OUTPUT" if os.path.isfile( inData ): if verbose > 0: print "shuffle input file '%s'" % inData cmd = genericCmd.replace("INPUT",inData).replace("OUTPUT",outData) print cmd returnStatus = os.system( cmd ) if returnStatus != 0: sys.stderr.write( "ERROR: 'shuffle' returned '%i'\n" % returnStatus ) sys.exit(1) elif os.path.isdir( inData ): if verbose > 0: print "shuffle files in input directory '%s'" % inData if os.path.exists( outData ): shutil.rmtree( outData ) os.mkdir( outData ) lInputFiles = glob.glob( "%s/*.fa" %( inData ) ) nbFastaFiles = 0 for inputFile in lInputFiles: nbFastaFiles += 1 if verbose > 1: print "%3i / %3i" % ( nbFastaFiles, len(lInputFiles) ) fastaBaseName = os.path.basename( inputFile ) prefix, extension = os.path.splitext( fastaBaseName ) cmd = genericCmd.replace("INPUT",inputFile).replace("OUTPUT","%s/%s_shuffle.fa"%(outData,prefix)) returnStatus = os.system( cmd ) if returnStatus != 0: sys.stderr.write( "ERROR: 'shuffle' returned '%i'\n" % returnStatus ) sys.exit(1)
def launchShuffle( self, outFileName="", run="yes", verbose="no" ): if CheckerUtils.isExecutableInUserPath("esl-shuffle"): prg = "esl-shuffle" else : prg = "shuffle" cmd = prg cmd += " -d " + self.inFileName if outFileName != "": cmd += " > " + outFileName if verbose == "yes": print "launch: %s" % ( cmd ); sys.stdout.flush() if run == "yes": return self.launch( prg, cmd, verbose ) elif run == "no": return cmd
def mergeCoordsInFile( inFile, outFile ): if not sys.modules.has_key( "pyRepetUnit.commons.checker.CheckerUtils" ): msg = "WARNING: can't find module 'CheckerUtils'" sys.stderr.write( "%s\n" % msg ) elif not CheckerUtils.isExecutableInUserPath( "mapOp" ): msg = "WARNING: can't find executable 'mapOp'" sys.stderr.write( "%s\n" % msg ) else: cmd = "mapOp" cmd += " -q %s" % ( inFile ) cmd += " -m" cmd += " 2>&1 > /dev/null" returnStatus = os.system( cmd ) if returnStatus != 0: print "ERROR: mapOp returned %i" % ( returnStatus ) sys.exit(1) os.rename( "%s.merge" % inFile, outFile )
def compareInputSequencesWithMummer( self, nbInSeq ): """ Launch MUmmer on two single-sequence fasta files to find all maximal matches regardless of their uniqueness and record stdout. Only N(N-1)/2 comparisons are made. @param nbInSeq: number of input sequences @type nbInSeq: integer """ if self._verbose > 0: print "aligning input sequences..." sys.stdout.flush() if not CheckerUtils.isExecutableInUserPath( "mummer" ): msg = "ERROR: 'mummer' is not in your PATH" sys.stderr.write( "%s\n" % ( msg ) ) sys.exit(1) lInFiles = glob.glob( "batch_*.fa" ) for i in range( 1, nbInSeq+1 ): for j in range( i+1, nbInSeq+1 ): if self._verbose > 1: print "launch MUmmer on %i versus %i" % ( i, j ) sys.stdout.flush() prg = "mummer" cmd = prg cmd += " -maxmatch" cmd += " -l %i" % ( self._minMatchLength ) cmd += " -b" cmd += " -F" cmd += " batch_%s.fa" % ( str(j).zfill( len( str( len(lInFiles) ) ) ) ) cmd += " batch_%s.fa" % ( str(i).zfill( len( str( len(lInFiles) ) ) ) ) cmd += " > mummer_%i_vs_%i.txt" % ( i, j ) if self._verbose < 3: cmd += " 2> /dev/null" returnStatus = os.system( cmd ) if returnStatus != 0: msg = "ERROR: '%s' returned '%i'" % ( prg, returnStatus ) sys.stderr.write( "%s\n" % ( msg ) ) sys.exit(1)
def _checkProgramName( self ): if not CheckerUtils.isExecutableInUserPath( self.getProgramName() ): msg = "ERROR: '%s' not in your PATH" % ( self.getProgramName() ) sys.stderr.write( "%s\n" %( msg ) ) sys.exit(1)