def start(self): """ run through list of targets, check any that haven't finished already """ sample = self.params['sample'] completed = sum(self.params['targets'].values()) logger.info("Sample: %s AssemblyChecker started with %s of %s targets completed" % (sample, completed, len(self.params['targets']))) for target_folder in self.params['targets']: if not self.params['targets'][target_folder]: f = os.path.join(target_folder, 'finished') if os.path.exists(f): self.params['targets'][target_folder] = True logger.info("%s exists" % f) completed += 1 #Now check whether all have finished, if not, add a new AssemblyChecker to the queue if len(self.params['targets']) > sum(self.params['targets'].values()): #some jobs haven't completed yet checker_params = {} for k in self.params: checker_params[k] = self.params[k] #checker_params = deepcopy(self.params) # checker = AssemblyChecker(checker_params) time.sleep(5) # sleep 5 seconds before putting a checker back on the job_q self.submit(AssemblyChecker.to_job(checker_params)) logger.info("Sample: %s Assemblies not finished: %s of %s targets completed" % (sample, completed, len(self.params['targets']))) else: params = {} for k in self.params: params[k] = self.params[k] # params = deepcopy(self.params) # finisher = Finisher(params) logger.debug("Sample: %s, iteration %s, Submitting finisher job to queue." % (sample, self.params['iteration'])) self.submit(Finisher.to_job(params)) logger.info("Sample: %s Assemblies finished: %s of %s targets completed" % (sample, completed, len(self.params['targets'])))
def all_workers_waiting(self): waiting = 0 for i in range(self.nprocs): logger.debug("ProcessRunner %d reports %d" % (i, self.status[i])) if self.status[i] == 1: waiting += 1 logger.debug("%d of %d workers are in the waiting state" % (waiting, self.nprocs)) return waiting == self.nprocs
def run(self): while True: try: self.waiting() self.launch() self.update_runstats() except exceptions.RerunnableError as e: logger.warn("[%s] A job needs to be rerun: %s" % (self.name, e)) self.update_runstats(1) except exceptions.FatalError as e: logger.error("[%s] A fatal error occurred: %s" % (self.name, e)) os.kill(self.ppid, signal.SIGINT) except (KeyboardInterrupt, SystemExit): logger.debug("Process interrupted") except Exception as e: ex_type, ex, tb = sys.exc_info() logger.error("\n".join(traceback.format_exception(ex_type, ex, tb))) logger.error("An unhandled exception occurred") os.kill(self.ppid, signal.SIGINT)
def start(self): """ run through list of targets, check any that haven't finished already """ sample = self.params['sample'] completed = sum(self.params['targets'].values()) logger.info( "Sample: %s AssemblyChecker started with %s of %s targets completed" % (sample, completed, len(self.params['targets']))) for target_folder in self.params['targets']: if not self.params['targets'][target_folder]: f = os.path.join(target_folder, 'finished') if os.path.exists(f): self.params['targets'][target_folder] = True logger.info("%s exists" % f) completed += 1 #Now check whether all have finished, if not, add a new AssemblyChecker to the queue if len(self.params['targets']) > sum(self.params['targets'].values()): #some jobs haven't completed yet checker_params = {} for k in self.params: checker_params[k] = self.params[k] #checker_params = deepcopy(self.params) # checker = AssemblyChecker(checker_params) time.sleep( 5 ) # sleep 5 seconds before putting a checker back on the job_q self.submit(AssemblyChecker.to_job(checker_params)) logger.info( "Sample: %s Assemblies not finished: %s of %s targets completed" % (sample, completed, len(self.params['targets']))) else: params = {} for k in self.params: params[k] = self.params[k] # params = deepcopy(self.params) # finisher = Finisher(params) logger.debug( "Sample: %s, iteration %s, Submitting finisher job to queue." % (sample, self.params['iteration'])) self.submit(Finisher.to_job(params)) logger.info( "Sample: %s Assemblies finished: %s of %s targets completed" % (sample, completed, len(self.params['targets'])))
def launch(self): # Block until there is an item on the queue item = self.q.get() # Run the job self.running() job = getattr(ARC.runners, item['runner'])(item['params']) logger.debug("[%s] Processing: %s" % (self.name, job.message())) job.queue(self.q) job.runner() # Update stats self.update_jobstats(item['runner']) # Clean up # job.clean() del job job = None del item item = None # Notify that the task has been completed self.q.task_done()
def run(self): logger.info("Starting...") logger.debug("Setting up workers.") for i in range(self.nprocs): worker = ProcessRunner(i, self.q, self.status, self.stats, self.pid) self.workers.append(worker) worker.daemon = False worker.start() while True: try: self.q.join() # This shouldn't be needed but we will check just in case if self.all_workers_waiting(): logger.debug( "Workers are all waiting and the queue is empty. Exiting" ) break else: logger.debug( "Workers are not in a waiting state. Waiting for more." ) time.sleep(5) except exceptions.FatalError: logger.error("A fatal error was encountered.") self.killall() raise except (KeyboardInterrupt, SystemExit): logger.error("Terminating processes") self.killall() raise except Exception as e: ex_type, ex, tb = sys.exc_info() logger.error("\n".join( traceback.format_exception(ex_type, ex, tb))) logger.error("An unhandled exception occurred") self.killall() raise finally: # Kill 'em all! self.killall() logger.info("-----") logger.info("%d processes returned ok." % (self.stats[0])) logger.info("%d processes had to be rerun." % (self.stats[1])) logger.info("-----") logger.info("%d Mapper jobs run." % (self.stats[2])) logger.info("%d Assembly jobs run." % (self.stats[3])) logger.info("%d Checker jobs run." % (self.stats[4])) logger.info("%d Finisher jobs run." % (self.stats[5])) logger.info("-----")
def run(self): logger.info("Starting...") logger.debug("Setting up workers.") for i in range(self.nprocs): worker = ProcessRunner( i, self.q, self.status, self.stats, self.pid) self.workers.append(worker) worker.daemon = False worker.start() while True: try: self.q.join() # This shouldn't be needed but we will check just in case if self.all_workers_waiting(): logger.debug("Workers are all waiting and the queue is empty. Exiting") break else: logger.debug("Workers are not in a waiting state. Waiting for more.") time.sleep(5) except exceptions.FatalError: logger.error("A fatal error was encountered.") self.killall() raise except (KeyboardInterrupt, SystemExit): logger.error("Terminating processes") self.killall() raise except Exception as e: ex_type, ex, tb = sys.exc_info() logger.error("\n".join(traceback.format_exception(ex_type, ex, tb))) logger.error("An unhandled exception occurred") self.killall() raise finally: # Kill 'em all! self.killall() logger.info("-----") logger.info("%d processes returned ok." % (self.stats[0])) logger.info("%d processes had to be rerun." % (self.stats[1])) logger.info("-----") logger.info("%d Mapper jobs run." % (self.stats[2])) logger.info("%d Assembly jobs run." % (self.stats[3])) logger.info("%d Checker jobs run." % (self.stats[4])) logger.info("%d Finisher jobs run." % (self.stats[5])) logger.info("-----")
def RunNewbler(self): #Code for running newbler """ Expects params keys: PE1 and PE2 and/or SE target_dir -urt """ #Check for necessary params: if not (('assembly_PE1' in self.params and 'assembly_PE2' in self.params) or 'assembly_SE' in self.params): raise exceptions.FatalError('Missing self.params in RunNewbler.') #Check for necessary files: if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params and not(os.path.exists(self.params['assembly_PE1']) or not(os.path.exists(self.params['assembly_PE2']))): raise exceptions.FatalError('Missing PE files in RunNewbler.') if 'assembly_SE' in self.params and not(os.path.exists(self.params['assembly_SE'])): raise exceptions.FatalError('Missing SE file in RunNewbler.') sample = self.params['sample'] target = self.params['target'] killed = False failed = False #determine whether to pipe output to a file or /dev/null if self.params['verbose']: out = open(os.path.join(self.params['target_dir'], "assembly.log"), 'w') else: out = open(os.devnull, 'w') #Build args for newAssembly: args = ['newAssembly', '-force'] if self.params['last_assembly'] and self.params['cdna']: #only run with cdna switch on the final assembly args += ['-cdna'] args += [os.path.join(self.params['target_dir'], 'assembly')] logger.debug("Calling newAssembly for sample: %s target %s" % (sample, target)) logger.info(" ".join(args)) ret = subprocess.call(args, stdout=out, stderr=out) #Build args for addRun: if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params: args = ['addRun', os.path.join(self.params['target_dir'], 'assembly')] args += [self.params['assembly_PE1']] logger.debug("Calling addRun for sample: %s target %s" % (sample, target)) logger.debug(" ".join(args)) ret = subprocess.call(args, stdout=out, stderr=out) args = ['addRun', os.path.join(self.params['target_dir'], 'assembly')] args += [self.params['assembly_PE2']] logger.debug("Calling addRun for sample: %s target %s" % (sample, target)) logger.debug(" ".join(args)) ret = subprocess.call(args, stdout=out, stderr=out) if 'assembly_SE' in self.params: args = ['addRun', os.path.join(self.params['target_dir'], 'assembly')] args += [self.params['assembly_SE']] logger.debug("Calling addRun for sample: %s target %s" % (sample, target)) logger.debug(" ".join(args)) ret = subprocess.call(args, stdout=out, stderr=out) #Build args for runProject args = ['runProject'] args += ['-cpu', '1'] if self.params['last_assembly'] and self.params['cdna']: args += ['-noace'] else: args += ['-nobig'] if self.params['urt'] and not self.params['last_assembly']: #only run with the -urt switch when it isn't the final assembly args += ['-urt'] if self.params['rip']: args += ['-rip'] args += [os.path.join(self.params['target_dir'], 'assembly')] try: start = time.time() logger.debug("Calling runProject for sample: %s target %s" % (sample, target)) logger.debug(" ".join(args)) ret = subprocess.Popen(args, stdout=out, stderr=out) pid = ret.pid while ret.poll() is None: if time.time() - start > self.params['assemblytimeout']: self.kill_process_children(pid) logger.warn("Sample: %s target: %s iteration: %s Killing assembly after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) killed = True break time.sleep(.5) except Exception as exc: txt = "Sample: %s, Target: %s: Unhandeled error running Newbler assembly" % (self.params['sample'], self.params['target']) txt += '\n\t' + str(exc) + "".join(traceback.format_exception) logger.warn(txt) failed = True pass finally: out.close() #Sometimes newbler doesn't seem to exit completely: self.kill_process_children(pid) #if ret != 0: #raise exceptions.RerunnableError("Newbler assembly failed.") if not killed and ret.poll() != 0: #raise exceptions.RerunnableError("Newbler assembly failed.") failed = True if failed: logger.info("Sample: %s target: %s iteration: %s Assembly failed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_failed\t" + str(time.time() - start)) outf.close() if killed: logger.info("Sample: %s target: %s iteration: %s Assembly killed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_killed\t" + str(time.time() - start)) outf.close() else: #Run finished without error logger.info("Sample: %s target: %s iteration: %s Assembly finished in %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_complete\t" + str(time.time() - start)) outf.close()
def RunSpades(self): """ Several arguments can be passed to spades.py: -1 [PE1], -2 [PE2], -s [SE], and -o [target_dir] """ #Check that required params are available if not (('assembly_PE1' in self.params and 'assembly_PE2' in self.params) or ('assembly_SE' in self.params)): raise exceptions.FatalError('Missing self.params in RunSpades.') #Check that the files actually exist if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params and not(os.path.exists(self.params['assembly_PE1']) or not(os.path.exists(self.params['assembly_PE2']))): raise exceptions.FatalError('Missing PE files in RunSpades.') if 'assembly_SE' in self.params and not(os.path.exists(self.params['assembly_SE'])): raise exceptions.FatalError('Missing SE file in RunSpades.') sample = self.params['sample'] target = self.params['target'] #Build args for assembler call args = ['spades.py', '-t', '1'] if self.params['only-assembler'] and not self.params['last_assembly']: args.append("--only-assembler") if self.params['format'] == 'fasta': args.append('--only-assembler') # spades errors on read correction if the input isn't fastq if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params: args += ['-1', self.params['assembly_PE1'], '-2', self.params['assembly_PE2']] if 'assembly_SE' in self.params: args += ['-s', self.params['assembly_SE']] args += ['-o', os.path.join(self.params['target_dir'], 'assembly')] if self.params['verbose']: out = open(os.path.join(self.params['target_dir'], "assembly.log"), 'w') else: out = open(os.devnull, 'w') logger.debug("Sample: %s target: %s Running spades assembler." % (sample, target)) logger.info(" ".join(args)) killed = False failed = False start = time.time() try: #ret = subprocess.call(args, stderr=out, stdout=out) ret = subprocess.Popen(args, stdout=out, stderr=out) pid = ret.pid while ret.poll() is None: if time.time() - start > self.params['assemblytimeout']: ret.kill() killed = True logger.warn("Sample: %s target: %s Assembly killed after %s seconds." % (sample, target, time.time() - start)) break time.sleep(.5) except Exception as exc: txt = ("Sample: %s, Target: %s: Unhandeled error running Spades assembly" % (sample, target)) txt += '\n\t' + str(exc) logger.warn(txt) failed = True pass finally: out.close() #Ensure that assembler exits cleanly: self.kill_process_children(pid) if not killed and ret.poll() != 0: failed = True if failed: logger.info("Sample: %s target: %s iteration: %s Assembly failed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_failed") outf.close() elif killed: logger.info("Sample: %s target: %s iteration: %s Assembly killed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_killed") outf.close() else: #Run finished without error logger.info("Sample: %s target: %s iteration: %s Assembly finished in %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_complete") outf.close()
def RunNewbler(self): #Code for running newbler """ Expects params keys: PE1 and PE2 and/or SE target_dir -urt """ #Check for necessary params: if not ( ('assembly_PE1' in self.params and 'assembly_PE2' in self.params) or 'assembly_SE' in self.params): raise exceptions.FatalError('Missing self.params in RunNewbler.') #Check for necessary files: if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params and not ( os.path.exists(self.params['assembly_PE1']) or not (os.path.exists(self.params['assembly_PE2']))): raise exceptions.FatalError('Missing PE files in RunNewbler.') if 'assembly_SE' in self.params and not (os.path.exists( self.params['assembly_SE'])): raise exceptions.FatalError('Missing SE file in RunNewbler.') sample = self.params['sample'] target = self.params['target'] killed = False failed = False #determine whether to pipe output to a file or /dev/null if self.params['verbose']: out = open(os.path.join(self.params['target_dir'], "assembly.log"), 'w') else: out = open(os.devnull, 'w') #Build args for newAssembly: args = ['newAssembly', '-force'] if self.params['last_assembly'] and self.params['cdna']: #only run with cdna switch on the final assembly args += ['-cdna'] args += [os.path.join(self.params['target_dir'], 'assembly')] logger.debug("Calling newAssembly for sample: %s target %s" % (sample, target)) logger.info(" ".join(args)) ret = subprocess.call(args, stdout=out, stderr=out) #Build args for addRun: if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params: args = [ 'addRun', os.path.join(self.params['target_dir'], 'assembly') ] args += [self.params['assembly_PE1']] logger.debug("Calling addRun for sample: %s target %s" % (sample, target)) logger.debug(" ".join(args)) ret = subprocess.call(args, stdout=out, stderr=out) args = [ 'addRun', os.path.join(self.params['target_dir'], 'assembly') ] args += [self.params['assembly_PE2']] logger.debug("Calling addRun for sample: %s target %s" % (sample, target)) logger.debug(" ".join(args)) ret = subprocess.call(args, stdout=out, stderr=out) if 'assembly_SE' in self.params: args = [ 'addRun', os.path.join(self.params['target_dir'], 'assembly') ] args += [self.params['assembly_SE']] logger.debug("Calling addRun for sample: %s target %s" % (sample, target)) logger.debug(" ".join(args)) ret = subprocess.call(args, stdout=out, stderr=out) #Build args for runProject args = ['runProject'] args += ['-cpu', '1'] if self.params['last_assembly'] and self.params['cdna']: args += ['-noace'] else: args += ['-nobig'] if self.params['urt'] and not self.params['last_assembly']: #only run with the -urt switch when it isn't the final assembly args += ['-urt'] if self.params['rip']: args += ['-rip'] args += [os.path.join(self.params['target_dir'], 'assembly')] try: start = time.time() logger.debug("Calling runProject for sample: %s target %s" % (sample, target)) logger.debug(" ".join(args)) ret = subprocess.Popen(args, stdout=out, stderr=out) pid = ret.pid while ret.poll() is None: if time.time() - start > self.params['assemblytimeout']: self.kill_process_children(pid) logger.warn( "Sample: %s target: %s iteration: %s Killing assembly after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) killed = True break time.sleep(.5) except Exception as exc: txt = "Sample: %s, Target: %s: Unhandeled error running Newbler assembly" % ( self.params['sample'], self.params['target']) txt += '\n\t' + str(exc) + "".join(traceback.format_exception) logger.warn(txt) failed = True pass finally: out.close() #Sometimes newbler doesn't seem to exit completely: self.kill_process_children(pid) #if ret != 0: #raise exceptions.RerunnableError("Newbler assembly failed.") if not killed and ret.poll() != 0: #raise exceptions.RerunnableError("Newbler assembly failed.") failed = True if failed: logger.info( "Sample: %s target: %s iteration: %s Assembly failed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_failed\t" + str(time.time() - start)) outf.close() if killed: logger.info( "Sample: %s target: %s iteration: %s Assembly killed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_killed\t" + str(time.time() - start)) outf.close() else: #Run finished without error logger.info( "Sample: %s target: %s iteration: %s Assembly finished in %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_complete\t" + str(time.time() - start)) outf.close()
def RunSpades(self): """ Several arguments can be passed to spades.py: -1 [PE1], -2 [PE2], -s [SE], and -o [target_dir] """ #Check that required params are available if not (('assembly_PE1' in self.params and 'assembly_PE2' in self.params) or ('assembly_SE' in self.params)): raise exceptions.FatalError('Missing self.params in RunSpades.') #Check that the files actually exist if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params and not ( os.path.exists(self.params['assembly_PE1']) or not (os.path.exists(self.params['assembly_PE2']))): raise exceptions.FatalError('Missing PE files in RunSpades.') if 'assembly_SE' in self.params and not (os.path.exists( self.params['assembly_SE'])): raise exceptions.FatalError('Missing SE file in RunSpades.') sample = self.params['sample'] target = self.params['target'] #Build args for assembler call args = ['spades.py', '-t', '1'] if self.params['only-assembler'] and not self.params['last_assembly']: args.append("--only-assembler") if self.params['format'] == 'fasta': args.append( '--only-assembler' ) # spades errors on read correction if the input isn't fastq if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params: args += [ '-1', self.params['assembly_PE1'], '-2', self.params['assembly_PE2'] ] if 'assembly_SE' in self.params: args += ['-s', self.params['assembly_SE']] args += ['-o', os.path.join(self.params['target_dir'], 'assembly')] if self.params['verbose']: out = open(os.path.join(self.params['target_dir'], "assembly.log"), 'w') else: out = open(os.devnull, 'w') logger.debug("Sample: %s target: %s Running spades assembler." % (sample, target)) logger.info(" ".join(args)) killed = False failed = False start = time.time() try: #ret = subprocess.call(args, stderr=out, stdout=out) ret = subprocess.Popen(args, stdout=out, stderr=out) pid = ret.pid while ret.poll() is None: if time.time() - start > self.params['assemblytimeout']: ret.kill() killed = True logger.warn( "Sample: %s target: %s Assembly killed after %s seconds." % (sample, target, time.time() - start)) break time.sleep(.5) except Exception as exc: txt = ( "Sample: %s, Target: %s: Unhandeled error running Spades assembly" % (sample, target)) txt += '\n\t' + str(exc) logger.warn(txt) failed = True pass finally: out.close() #Ensure that assembler exits cleanly: self.kill_process_children(pid) if not killed and ret.poll() != 0: failed = True if failed: logger.info( "Sample: %s target: %s iteration: %s Assembly failed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_failed") outf.close() elif killed: logger.info( "Sample: %s target: %s iteration: %s Assembly killed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_killed") outf.close() else: #Run finished without error logger.info( "Sample: %s target: %s iteration: %s Assembly finished in %s seconds" % (sample, target, self.params['iteration'], time.time() - start)) outf = open(os.path.join(self.params['target_dir'], "finished"), 'w') outf.write("assembly_complete") outf.close()
def run_blat(self): #Check for necessary params: if not ('sample' in self.params and 'reference' in self.params and 'working_dir' in self.params and (('PE1' in self.params and 'PE2' in self.params) or 'SE' in self.params)): raise exceptions.FatalError('Missing self.params in run_bowtie2.') #Check for necessary files: if os.path.exists(self.params['reference']) is False: raise exceptions.FatalError("Missing reference file for mapping") if 'PE1' in self.params and 'PE2' in self.params: if not (os.path.exists(self.params['PE1']) and os.path.exists(self.params['PE2'])): raise exceptions.FatalError( "One or both PE files can not be found for mapping.") if 'SE' in self.params: if not os.path.exists(self.params['SE']): raise exceptions.FatalError("SE file cannot be found.") #Blat doesn't need an index working_dir = self.params['working_dir'] #Check whether to log to temporary file, or default to os.devnull if 'verbose' in self.params: out = open(os.path.join(working_dir, "mapping_log.txt"), 'w') else: out = open(os.devnull, 'w') #Build a temporary txt file with all of the reads: allreads_outf = open(os.path.join(working_dir, 'reads.txt'), 'w') if 'PE1' in self.params and 'PE2' in self.params: allreads_outf.write(self.params['PE1'] + '\n') allreads_outf.write(self.params['PE2'] + '\n') if 'SE' in self.params: allreads_outf.write(self.params['SE'] + '\n') allreads_outf.close() #Do blat mapping args = [ 'blat', self.params['reference'], os.path.join(working_dir, 'reads.txt') ] if self.params['format'] == 'fastq': args.append('-fastq') if self.params['fastmap']: args.append('-fastMap') #Some new experimental params to increase specificity after the first iteration: if self.params['maskrepeats']: args.append("-mask=lower") if self.params['iteration'] > 0 or not self.params['sloppymapping']: args.append("-minIdentity=98") args.append("-minScore=40") args.append(os.path.join(working_dir, 'mapping.psl')) logger.info("Sample: %s Calling blat mapper" % self.params['sample']) logger.debug(" ".join(args)) try: ret = subprocess.call(args, stdout=out, stderr=out) except Exception as exc: txt = ( "Sample %s: Unhandeled error running blat mapping, check log file." % self.params['sample']) + '\n\t' + str(exc) raise exceptions.FatalError(txt) finally: out.close() if ret != 0: raise exceptions.FatalError( 'Sample: %s Error running blat mapping, check log file. \n\t %s' % (self.params['sample'], " ".join(args))) #Extract the PSL to a dict self.params['mapping_dict'] = self.PSL_to_dict( os.path.join(working_dir, 'mapping.psl')) #Cleanup os.remove(os.path.join(working_dir, 'mapping.psl')) out.close()
def killall(self): for i in range(self.nprocs): logger.debug("Shutting down %s" % (self.workers[i].name)) self.workers[i].terminate() self.workers[i].join()
def debug(self, msg): if self.loglevel == logging.DEBUG: name = self.name logger.debug("%-12s| %s" % (name, msg))
def run_blat(self): #Check for necessary params: if not ('sample' in self.params and 'reference' in self.params and 'working_dir' in self.params and (('PE1' in self.params and 'PE2' in self.params) or 'SE' in self.params)): raise exceptions.FatalError('Missing self.params in run_bowtie2.') #Check for necessary files: if os.path.exists(self.params['reference']) is False: raise exceptions.FatalError("Missing reference file for mapping") if 'PE1' in self.params and 'PE2' in self.params: if not (os.path.exists(self.params['PE1']) and os.path.exists(self.params['PE2'])): raise exceptions.FatalError( "One or both PE files can not be found for mapping.") if 'SE' in self.params: if not os.path.exists(self.params['SE']): raise exceptions.FatalError("SE file cannot be found.") #Blat doesn't need an index working_dir = self.params['working_dir'] #Check whether to log to temporary file, or default to os.devnull if 'verbose' in self.params: out = open(os.path.join(working_dir, "mapping_log.txt"), 'w') else: out = open(os.devnull, 'w') #Build a temporary txt file with all of the reads: allreads_outf = open(os.path.join(working_dir, 'reads.txt'), 'w') if 'PE1' in self.params and 'PE2' in self.params: allreads_outf.write(self.params['PE1'] + '\n') allreads_outf.write(self.params['PE2'] + '\n') if 'SE' in self.params: allreads_outf.write(self.params['SE'] + '\n') allreads_outf.close() #Do blat mapping args = ['blat', self.params['reference'], os.path.join(working_dir, 'reads.txt')] if self.params['format'] == 'fastq': args.append('-fastq') if self.params['fastmap']: args.append('-fastMap') #Some new experimental params to increase specificity after the first iteration: if self.params['maskrepeats']: args.append("-mask=lower") if self.params['iteration'] > 0 or not self.params['sloppymapping']: args.append("-minIdentity=98") args.append("-minScore=40") args.append(os.path.join(working_dir, 'mapping.psl')) logger.info("Sample: %s Calling blat mapper" % self.params['sample']) logger.debug(" ".join(args)) try: ret = subprocess.call(args, stdout=out, stderr=out) except Exception as exc: txt = ("Sample %s: Unhandeled error running blat mapping, check log file." % self.params['sample']) + '\n\t' + str(exc) raise exceptions.FatalError(txt) finally: out.close() if ret != 0: raise exceptions.FatalError('Sample: %s Error running blat mapping, check log file. \n\t %s' % (self.params['sample'], " ".join(args))) #Extract the PSL to a dict self.params['mapping_dict'] = self.PSL_to_dict(os.path.join(working_dir, 'mapping.psl')) #Cleanup os.remove(os.path.join(working_dir, 'mapping.psl')) out.close()