Exemple #1
0
 def start(self):
     """ run through list of targets, check any that haven't finished already """
     sample = self.params['sample']
     completed = sum(self.params['targets'].values())
     logger.info("Sample: %s AssemblyChecker started with %s of %s targets completed" % (sample, completed, len(self.params['targets'])))
     for target_folder in self.params['targets']:
         if not self.params['targets'][target_folder]:
             f = os.path.join(target_folder, 'finished')
             if os.path.exists(f):
                 self.params['targets'][target_folder] = True
                 logger.info("%s exists" % f)
                 completed += 1
     #Now check whether all have finished, if not, add a new AssemblyChecker to the queue
     if len(self.params['targets']) > sum(self.params['targets'].values()):
         #some jobs haven't completed yet
         checker_params = {}
         for k in self.params:
             checker_params[k] = self.params[k]
         #checker_params = deepcopy(self.params)
         # checker = AssemblyChecker(checker_params)
         time.sleep(5)  # sleep 5 seconds before putting a checker back on the job_q
         self.submit(AssemblyChecker.to_job(checker_params))
         logger.info("Sample: %s Assemblies not finished: %s of %s targets completed" % (sample, completed, len(self.params['targets'])))
     else:
         params = {}
         for k in self.params:
             params[k] = self.params[k]
         # params = deepcopy(self.params)
         # finisher = Finisher(params)
         logger.debug("Sample: %s, iteration %s, Submitting finisher job to queue." % (sample, self.params['iteration']))
         self.submit(Finisher.to_job(params))
         logger.info("Sample: %s Assemblies finished: %s of %s targets completed" % (sample, completed, len(self.params['targets'])))
Exemple #2
0
    def all_workers_waiting(self):
        waiting = 0
        for i in range(self.nprocs):
            logger.debug("ProcessRunner %d reports %d" % (i, self.status[i]))
            if self.status[i] == 1:
                waiting += 1

        logger.debug("%d of %d workers are in the waiting state" % (waiting, self.nprocs))
        return waiting == self.nprocs
Exemple #3
0
    def all_workers_waiting(self):
        waiting = 0
        for i in range(self.nprocs):
            logger.debug("ProcessRunner %d reports %d" % (i, self.status[i]))
            if self.status[i] == 1:
                waiting += 1

        logger.debug("%d of %d workers are in the waiting state" %
                     (waiting, self.nprocs))
        return waiting == self.nprocs
Exemple #4
0
 def run(self):
     while True:
         try:
             self.waiting()
             self.launch()
             self.update_runstats()
         except exceptions.RerunnableError as e:
             logger.warn("[%s] A job needs to be rerun: %s" % (self.name, e))
             self.update_runstats(1)
         except exceptions.FatalError as e:
             logger.error("[%s] A fatal error occurred: %s" % (self.name, e))
             os.kill(self.ppid, signal.SIGINT)
         except (KeyboardInterrupt, SystemExit):
             logger.debug("Process interrupted")
         except Exception as e:
             ex_type, ex, tb = sys.exc_info()
             logger.error("\n".join(traceback.format_exception(ex_type, ex, tb)))
             logger.error("An unhandled exception occurred")
             os.kill(self.ppid, signal.SIGINT)
Exemple #5
0
 def start(self):
     """ run through list of targets, check any that haven't finished already """
     sample = self.params['sample']
     completed = sum(self.params['targets'].values())
     logger.info(
         "Sample: %s AssemblyChecker started with %s of %s targets completed"
         % (sample, completed, len(self.params['targets'])))
     for target_folder in self.params['targets']:
         if not self.params['targets'][target_folder]:
             f = os.path.join(target_folder, 'finished')
             if os.path.exists(f):
                 self.params['targets'][target_folder] = True
                 logger.info("%s exists" % f)
                 completed += 1
     #Now check whether all have finished, if not, add a new AssemblyChecker to the queue
     if len(self.params['targets']) > sum(self.params['targets'].values()):
         #some jobs haven't completed yet
         checker_params = {}
         for k in self.params:
             checker_params[k] = self.params[k]
         #checker_params = deepcopy(self.params)
         # checker = AssemblyChecker(checker_params)
         time.sleep(
             5
         )  # sleep 5 seconds before putting a checker back on the job_q
         self.submit(AssemblyChecker.to_job(checker_params))
         logger.info(
             "Sample: %s Assemblies not finished: %s of %s targets completed"
             % (sample, completed, len(self.params['targets'])))
     else:
         params = {}
         for k in self.params:
             params[k] = self.params[k]
         # params = deepcopy(self.params)
         # finisher = Finisher(params)
         logger.debug(
             "Sample: %s, iteration %s, Submitting finisher job to queue." %
             (sample, self.params['iteration']))
         self.submit(Finisher.to_job(params))
         logger.info(
             "Sample: %s Assemblies finished: %s of %s targets completed" %
             (sample, completed, len(self.params['targets'])))
Exemple #6
0
    def launch(self):
        # Block until there is an item on the queue
        item = self.q.get()

        # Run the job
        self.running()
        job = getattr(ARC.runners, item['runner'])(item['params'])
        logger.debug("[%s] Processing: %s" % (self.name, job.message()))
        job.queue(self.q)
        job.runner()

        # Update stats
        self.update_jobstats(item['runner'])

        # Clean up
        # job.clean()
        del job
        job = None
        del item
        item = None

        # Notify that the task has been completed
        self.q.task_done()
Exemple #7
0
    def run(self):
        logger.info("Starting...")
        logger.debug("Setting up workers.")

        for i in range(self.nprocs):
            worker = ProcessRunner(i, self.q, self.status, self.stats,
                                   self.pid)
            self.workers.append(worker)
            worker.daemon = False
            worker.start()

        while True:
            try:
                self.q.join()

                # This shouldn't be needed but we will check just in case
                if self.all_workers_waiting():
                    logger.debug(
                        "Workers are all waiting and the queue is empty.  Exiting"
                    )
                    break
                else:
                    logger.debug(
                        "Workers are not in a waiting state.  Waiting for more."
                    )
                    time.sleep(5)

            except exceptions.FatalError:
                logger.error("A fatal error was encountered.")
                self.killall()
                raise
            except (KeyboardInterrupt, SystemExit):
                logger.error("Terminating processes")
                self.killall()
                raise
            except Exception as e:
                ex_type, ex, tb = sys.exc_info()
                logger.error("\n".join(
                    traceback.format_exception(ex_type, ex, tb)))
                logger.error("An unhandled exception occurred")
                self.killall()
                raise
            finally:
                # Kill 'em all!
                self.killall()

        logger.info("-----")
        logger.info("%d processes returned ok." % (self.stats[0]))
        logger.info("%d processes had to be rerun." % (self.stats[1]))
        logger.info("-----")
        logger.info("%d Mapper jobs run." % (self.stats[2]))
        logger.info("%d Assembly jobs run." % (self.stats[3]))
        logger.info("%d Checker jobs run." % (self.stats[4]))
        logger.info("%d Finisher jobs run." % (self.stats[5]))
        logger.info("-----")
Exemple #8
0
    def run(self):
        logger.info("Starting...")
        logger.debug("Setting up workers.")

        for i in range(self.nprocs):
            worker = ProcessRunner(
                i,
                self.q,
                self.status,
                self.stats,
                self.pid)
            self.workers.append(worker)
            worker.daemon = False
            worker.start()

        while True:
            try:
                self.q.join()

                # This shouldn't be needed but we will check just in case
                if self.all_workers_waiting():
                    logger.debug("Workers are all waiting and the queue is empty.  Exiting")
                    break
                else:
                    logger.debug("Workers are not in a waiting state.  Waiting for more.")
                    time.sleep(5)

            except exceptions.FatalError:
                logger.error("A fatal error was encountered.")
                self.killall()
                raise
            except (KeyboardInterrupt, SystemExit):
                logger.error("Terminating processes")
                self.killall()
                raise
            except Exception as e:
                ex_type, ex, tb = sys.exc_info()
                logger.error("\n".join(traceback.format_exception(ex_type, ex, tb)))
                logger.error("An unhandled exception occurred")
                self.killall()
                raise
            finally:
                # Kill 'em all!
                self.killall()

        logger.info("-----")
        logger.info("%d processes returned ok." % (self.stats[0]))
        logger.info("%d processes had to be rerun." % (self.stats[1]))
        logger.info("-----")
        logger.info("%d Mapper jobs run." % (self.stats[2]))
        logger.info("%d Assembly jobs run." % (self.stats[3]))
        logger.info("%d Checker jobs run." % (self.stats[4]))
        logger.info("%d Finisher jobs run." % (self.stats[5]))
        logger.info("-----")
Exemple #9
0
    def RunNewbler(self):
        #Code for running newbler
        """
        Expects params keys:
            PE1 and PE2 and/or SE
            target_dir
            -urt
        """
        #Check for necessary params:
        if not (('assembly_PE1' in self.params and 'assembly_PE2' in self.params) or 'assembly_SE' in self.params):
            raise exceptions.FatalError('Missing self.params in RunNewbler.')

        #Check for necessary files:
        if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params and not(os.path.exists(self.params['assembly_PE1']) or not(os.path.exists(self.params['assembly_PE2']))):
            raise exceptions.FatalError('Missing PE files in RunNewbler.')

        if 'assembly_SE' in self.params and not(os.path.exists(self.params['assembly_SE'])):
            raise exceptions.FatalError('Missing SE file in RunNewbler.')

        sample = self.params['sample']
        target = self.params['target']
        killed = False
        failed = False

        #determine whether to pipe output to a file or /dev/null
        if self.params['verbose']:
            out = open(os.path.join(self.params['target_dir'], "assembly.log"), 'w')
        else:
            out = open(os.devnull, 'w')

        #Build args for newAssembly:
        args = ['newAssembly', '-force']
        if self.params['last_assembly'] and self.params['cdna']:
            #only run with cdna switch on the final assembly
            args += ['-cdna']
        args += [os.path.join(self.params['target_dir'], 'assembly')]
        logger.debug("Calling newAssembly for sample: %s target %s" % (sample, target))
        logger.info(" ".join(args))
        ret = subprocess.call(args, stdout=out, stderr=out)
        #Build args for addRun:
        if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params:
            args = ['addRun', os.path.join(self.params['target_dir'], 'assembly')]
            args += [self.params['assembly_PE1']]
            logger.debug("Calling addRun for sample: %s target %s" % (sample, target))
            logger.debug(" ".join(args))
            ret = subprocess.call(args, stdout=out, stderr=out)

            args = ['addRun', os.path.join(self.params['target_dir'], 'assembly')]
            args += [self.params['assembly_PE2']]
            logger.debug("Calling addRun for sample: %s target %s" % (sample, target))
            logger.debug(" ".join(args))
            ret = subprocess.call(args, stdout=out, stderr=out)
        if 'assembly_SE' in self.params:
            args = ['addRun', os.path.join(self.params['target_dir'], 'assembly')]
            args += [self.params['assembly_SE']]
            logger.debug("Calling addRun for sample: %s target %s" % (sample, target))
            logger.debug(" ".join(args))
            ret = subprocess.call(args, stdout=out, stderr=out)

        #Build args for runProject
        args = ['runProject']
        args += ['-cpu', '1']
        if self.params['last_assembly'] and self.params['cdna']:
            args += ['-noace']
        else:
            args += ['-nobig']
        if self.params['urt'] and not self.params['last_assembly']:
            #only run with the -urt switch when it isn't the final assembly
            args += ['-urt']
        if self.params['rip']:
            args += ['-rip']
        args += [os.path.join(self.params['target_dir'], 'assembly')]
        try:
            start = time.time()
            logger.debug("Calling runProject for sample: %s target %s" % (sample, target))
            logger.debug(" ".join(args))
            ret = subprocess.Popen(args, stdout=out, stderr=out)
            pid = ret.pid
            while ret.poll() is None:
                if time.time() - start > self.params['assemblytimeout']:
                    self.kill_process_children(pid)
                    logger.warn("Sample: %s target: %s iteration: %s Killing assembly after %s seconds" % (sample, target, self.params['iteration'], time.time() - start))
                    killed = True
                    break
                time.sleep(.5)
        except Exception as exc:
            txt = "Sample: %s, Target: %s: Unhandeled error running Newbler assembly" % (self.params['sample'], self.params['target'])
            txt += '\n\t' + str(exc) + "".join(traceback.format_exception)
            logger.warn(txt)
            failed = True
            pass
        finally:
            out.close()

        #Sometimes newbler doesn't seem to exit completely:
        self.kill_process_children(pid)

        #if ret != 0:
            #raise exceptions.RerunnableError("Newbler assembly failed.")

        if not killed and ret.poll() != 0:
            #raise exceptions.RerunnableError("Newbler assembly failed.")
            failed = True

        if failed:
            logger.info("Sample: %s target: %s iteration: %s Assembly failed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"), 'w')
            outf.write("assembly_failed\t" + str(time.time() - start))
            outf.close()
        if killed:
            logger.info("Sample: %s target: %s iteration: %s Assembly killed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"), 'w')
            outf.write("assembly_killed\t" + str(time.time() - start))
            outf.close()
        else:
            #Run finished without error
            logger.info("Sample: %s target: %s iteration: %s Assembly finished in %s seconds" % (sample, target, self.params['iteration'], time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"), 'w')
            outf.write("assembly_complete\t" + str(time.time() - start))
            outf.close()
Exemple #10
0
    def RunSpades(self):
        """
        Several arguments can be passed to spades.py: -1 [PE1], -2 [PE2], -s [SE], and -o [target_dir]
        """
        #Check that required params are available
        if not (('assembly_PE1' in self.params and 'assembly_PE2' in self.params) or ('assembly_SE' in self.params)):
            raise exceptions.FatalError('Missing self.params in RunSpades.')

        #Check that the files actually exist
        if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params and not(os.path.exists(self.params['assembly_PE1']) or not(os.path.exists(self.params['assembly_PE2']))):
            raise exceptions.FatalError('Missing PE files in RunSpades.')
        if 'assembly_SE' in self.params and not(os.path.exists(self.params['assembly_SE'])):
            raise exceptions.FatalError('Missing SE file in RunSpades.')

        sample = self.params['sample']
        target = self.params['target']

        #Build args for assembler call
        args = ['spades.py', '-t', '1']
        if self.params['only-assembler'] and not self.params['last_assembly']:
            args.append("--only-assembler")
        if self.params['format'] == 'fasta':
            args.append('--only-assembler')  # spades errors on read correction if the input isn't fastq
        if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params:
            args += ['-1', self.params['assembly_PE1'], '-2', self.params['assembly_PE2']]
        if 'assembly_SE' in self.params:
            args += ['-s', self.params['assembly_SE']]
        args += ['-o', os.path.join(self.params['target_dir'], 'assembly')]
        if self.params['verbose']:
            out = open(os.path.join(self.params['target_dir'], "assembly.log"), 'w')
        else:
            out = open(os.devnull, 'w')

        logger.debug("Sample: %s target: %s Running spades assembler." % (sample, target))
        logger.info(" ".join(args))
        killed = False
        failed = False
        start = time.time()
        try:
            #ret = subprocess.call(args, stderr=out, stdout=out)
            ret = subprocess.Popen(args, stdout=out, stderr=out)
            pid = ret.pid
            while ret.poll() is None:
                if time.time() - start > self.params['assemblytimeout']:
                    ret.kill()
                    killed = True
                    logger.warn("Sample: %s target: %s Assembly killed after %s seconds." % (sample, target, time.time() - start))
                    break
                time.sleep(.5)
        except Exception as exc:
            txt = ("Sample: %s, Target: %s: Unhandeled error running Spades assembly" % (sample, target))
            txt += '\n\t' + str(exc)
            logger.warn(txt)
            failed = True
            pass
        finally:
            out.close()

        #Ensure that assembler exits cleanly:
        self.kill_process_children(pid)

        if not killed and ret.poll() != 0:
            failed = True
        if failed:
            logger.info("Sample: %s target: %s iteration: %s Assembly failed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"), 'w')
            outf.write("assembly_failed")
            outf.close()
        elif killed:
            logger.info("Sample: %s target: %s iteration: %s Assembly killed after %s seconds" % (sample, target, self.params['iteration'], time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"), 'w')
            outf.write("assembly_killed")
            outf.close()
        else:
            #Run finished without error
            logger.info("Sample: %s target: %s iteration: %s Assembly finished in %s seconds" % (sample, target, self.params['iteration'], time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"), 'w')
            outf.write("assembly_complete")
            outf.close()
Exemple #11
0
    def RunNewbler(self):
        #Code for running newbler
        """
        Expects params keys:
            PE1 and PE2 and/or SE
            target_dir
            -urt
        """
        #Check for necessary params:
        if not (
            ('assembly_PE1' in self.params and 'assembly_PE2' in self.params)
                or 'assembly_SE' in self.params):
            raise exceptions.FatalError('Missing self.params in RunNewbler.')

        #Check for necessary files:
        if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params and not (
                os.path.exists(self.params['assembly_PE1'])
                or not (os.path.exists(self.params['assembly_PE2']))):
            raise exceptions.FatalError('Missing PE files in RunNewbler.')

        if 'assembly_SE' in self.params and not (os.path.exists(
                self.params['assembly_SE'])):
            raise exceptions.FatalError('Missing SE file in RunNewbler.')

        sample = self.params['sample']
        target = self.params['target']
        killed = False
        failed = False

        #determine whether to pipe output to a file or /dev/null
        if self.params['verbose']:
            out = open(os.path.join(self.params['target_dir'], "assembly.log"),
                       'w')
        else:
            out = open(os.devnull, 'w')

        #Build args for newAssembly:
        args = ['newAssembly', '-force']
        if self.params['last_assembly'] and self.params['cdna']:
            #only run with cdna switch on the final assembly
            args += ['-cdna']
        args += [os.path.join(self.params['target_dir'], 'assembly')]
        logger.debug("Calling newAssembly for sample: %s target %s" %
                     (sample, target))
        logger.info(" ".join(args))
        ret = subprocess.call(args, stdout=out, stderr=out)
        #Build args for addRun:
        if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params:
            args = [
                'addRun',
                os.path.join(self.params['target_dir'], 'assembly')
            ]
            args += [self.params['assembly_PE1']]
            logger.debug("Calling addRun for sample: %s target %s" %
                         (sample, target))
            logger.debug(" ".join(args))
            ret = subprocess.call(args, stdout=out, stderr=out)

            args = [
                'addRun',
                os.path.join(self.params['target_dir'], 'assembly')
            ]
            args += [self.params['assembly_PE2']]
            logger.debug("Calling addRun for sample: %s target %s" %
                         (sample, target))
            logger.debug(" ".join(args))
            ret = subprocess.call(args, stdout=out, stderr=out)
        if 'assembly_SE' in self.params:
            args = [
                'addRun',
                os.path.join(self.params['target_dir'], 'assembly')
            ]
            args += [self.params['assembly_SE']]
            logger.debug("Calling addRun for sample: %s target %s" %
                         (sample, target))
            logger.debug(" ".join(args))
            ret = subprocess.call(args, stdout=out, stderr=out)

        #Build args for runProject
        args = ['runProject']
        args += ['-cpu', '1']
        if self.params['last_assembly'] and self.params['cdna']:
            args += ['-noace']
        else:
            args += ['-nobig']
        if self.params['urt'] and not self.params['last_assembly']:
            #only run with the -urt switch when it isn't the final assembly
            args += ['-urt']
        if self.params['rip']:
            args += ['-rip']
        args += [os.path.join(self.params['target_dir'], 'assembly')]
        try:
            start = time.time()
            logger.debug("Calling runProject for sample: %s target %s" %
                         (sample, target))
            logger.debug(" ".join(args))
            ret = subprocess.Popen(args, stdout=out, stderr=out)
            pid = ret.pid
            while ret.poll() is None:
                if time.time() - start > self.params['assemblytimeout']:
                    self.kill_process_children(pid)
                    logger.warn(
                        "Sample: %s target: %s iteration: %s Killing assembly after %s seconds"
                        % (sample, target, self.params['iteration'],
                           time.time() - start))
                    killed = True
                    break
                time.sleep(.5)
        except Exception as exc:
            txt = "Sample: %s, Target: %s: Unhandeled error running Newbler assembly" % (
                self.params['sample'], self.params['target'])
            txt += '\n\t' + str(exc) + "".join(traceback.format_exception)
            logger.warn(txt)
            failed = True
            pass
        finally:
            out.close()

        #Sometimes newbler doesn't seem to exit completely:
        self.kill_process_children(pid)

        #if ret != 0:
        #raise exceptions.RerunnableError("Newbler assembly failed.")

        if not killed and ret.poll() != 0:
            #raise exceptions.RerunnableError("Newbler assembly failed.")
            failed = True

        if failed:
            logger.info(
                "Sample: %s target: %s iteration: %s Assembly failed after %s seconds"
                % (sample, target, self.params['iteration'],
                   time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"),
                        'w')
            outf.write("assembly_failed\t" + str(time.time() - start))
            outf.close()
        if killed:
            logger.info(
                "Sample: %s target: %s iteration: %s Assembly killed after %s seconds"
                % (sample, target, self.params['iteration'],
                   time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"),
                        'w')
            outf.write("assembly_killed\t" + str(time.time() - start))
            outf.close()
        else:
            #Run finished without error
            logger.info(
                "Sample: %s target: %s iteration: %s Assembly finished in %s seconds"
                % (sample, target, self.params['iteration'],
                   time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"),
                        'w')
            outf.write("assembly_complete\t" + str(time.time() - start))
            outf.close()
Exemple #12
0
    def RunSpades(self):
        """
        Several arguments can be passed to spades.py: -1 [PE1], -2 [PE2], -s [SE], and -o [target_dir]
        """
        #Check that required params are available
        if not (('assembly_PE1' in self.params
                 and 'assembly_PE2' in self.params) or
                ('assembly_SE' in self.params)):
            raise exceptions.FatalError('Missing self.params in RunSpades.')

        #Check that the files actually exist
        if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params and not (
                os.path.exists(self.params['assembly_PE1'])
                or not (os.path.exists(self.params['assembly_PE2']))):
            raise exceptions.FatalError('Missing PE files in RunSpades.')
        if 'assembly_SE' in self.params and not (os.path.exists(
                self.params['assembly_SE'])):
            raise exceptions.FatalError('Missing SE file in RunSpades.')

        sample = self.params['sample']
        target = self.params['target']

        #Build args for assembler call
        args = ['spades.py', '-t', '1']
        if self.params['only-assembler'] and not self.params['last_assembly']:
            args.append("--only-assembler")
        if self.params['format'] == 'fasta':
            args.append(
                '--only-assembler'
            )  # spades errors on read correction if the input isn't fastq
        if 'assembly_PE1' in self.params and 'assembly_PE2' in self.params:
            args += [
                '-1', self.params['assembly_PE1'], '-2',
                self.params['assembly_PE2']
            ]
        if 'assembly_SE' in self.params:
            args += ['-s', self.params['assembly_SE']]
        args += ['-o', os.path.join(self.params['target_dir'], 'assembly')]
        if self.params['verbose']:
            out = open(os.path.join(self.params['target_dir'], "assembly.log"),
                       'w')
        else:
            out = open(os.devnull, 'w')

        logger.debug("Sample: %s target: %s Running spades assembler." %
                     (sample, target))
        logger.info(" ".join(args))
        killed = False
        failed = False
        start = time.time()
        try:
            #ret = subprocess.call(args, stderr=out, stdout=out)
            ret = subprocess.Popen(args, stdout=out, stderr=out)
            pid = ret.pid
            while ret.poll() is None:
                if time.time() - start > self.params['assemblytimeout']:
                    ret.kill()
                    killed = True
                    logger.warn(
                        "Sample: %s target: %s Assembly killed after %s seconds."
                        % (sample, target, time.time() - start))
                    break
                time.sleep(.5)
        except Exception as exc:
            txt = (
                "Sample: %s, Target: %s: Unhandeled error running Spades assembly"
                % (sample, target))
            txt += '\n\t' + str(exc)
            logger.warn(txt)
            failed = True
            pass
        finally:
            out.close()

        #Ensure that assembler exits cleanly:
        self.kill_process_children(pid)

        if not killed and ret.poll() != 0:
            failed = True
        if failed:
            logger.info(
                "Sample: %s target: %s iteration: %s Assembly failed after %s seconds"
                % (sample, target, self.params['iteration'],
                   time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"),
                        'w')
            outf.write("assembly_failed")
            outf.close()
        elif killed:
            logger.info(
                "Sample: %s target: %s iteration: %s Assembly killed after %s seconds"
                % (sample, target, self.params['iteration'],
                   time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"),
                        'w')
            outf.write("assembly_killed")
            outf.close()
        else:
            #Run finished without error
            logger.info(
                "Sample: %s target: %s iteration: %s Assembly finished in %s seconds"
                % (sample, target, self.params['iteration'],
                   time.time() - start))
            outf = open(os.path.join(self.params['target_dir'], "finished"),
                        'w')
            outf.write("assembly_complete")
            outf.close()
Exemple #13
0
    def run_blat(self):
        #Check for necessary params:
        if not ('sample' in self.params and 'reference' in self.params
                and 'working_dir' in self.params and
                (('PE1' in self.params and 'PE2' in self.params)
                 or 'SE' in self.params)):
            raise exceptions.FatalError('Missing self.params in run_bowtie2.')
        #Check for necessary files:
        if os.path.exists(self.params['reference']) is False:
            raise exceptions.FatalError("Missing reference file for mapping")
        if 'PE1' in self.params and 'PE2' in self.params:
            if not (os.path.exists(self.params['PE1'])
                    and os.path.exists(self.params['PE2'])):
                raise exceptions.FatalError(
                    "One or both PE files can not be found for mapping.")
        if 'SE' in self.params:
            if not os.path.exists(self.params['SE']):
                raise exceptions.FatalError("SE file cannot be found.")

        #Blat doesn't need an index
        working_dir = self.params['working_dir']

        #Check whether to log to temporary file, or default to os.devnull
        if 'verbose' in self.params:
            out = open(os.path.join(working_dir, "mapping_log.txt"), 'w')
        else:
            out = open(os.devnull, 'w')

        #Build a temporary txt file with all of the reads:
        allreads_outf = open(os.path.join(working_dir, 'reads.txt'), 'w')
        if 'PE1' in self.params and 'PE2' in self.params:
            allreads_outf.write(self.params['PE1'] + '\n')
            allreads_outf.write(self.params['PE2'] + '\n')
        if 'SE' in self.params:
            allreads_outf.write(self.params['SE'] + '\n')
        allreads_outf.close()

        #Do blat mapping
        args = [
            'blat', self.params['reference'],
            os.path.join(working_dir, 'reads.txt')
        ]
        if self.params['format'] == 'fastq':
            args.append('-fastq')
        if self.params['fastmap']:
            args.append('-fastMap')
        #Some new experimental params to increase specificity after the first iteration:
        if self.params['maskrepeats']:
            args.append("-mask=lower")
        if self.params['iteration'] > 0 or not self.params['sloppymapping']:
            args.append("-minIdentity=98")
            args.append("-minScore=40")
        args.append(os.path.join(working_dir, 'mapping.psl'))

        logger.info("Sample: %s Calling blat mapper" % self.params['sample'])
        logger.debug(" ".join(args))
        try:
            ret = subprocess.call(args, stdout=out, stderr=out)
        except Exception as exc:
            txt = (
                "Sample %s: Unhandeled error running blat mapping, check log file."
                % self.params['sample']) + '\n\t' + str(exc)
            raise exceptions.FatalError(txt)
        finally:
            out.close()
        if ret != 0:
            raise exceptions.FatalError(
                'Sample: %s Error running blat mapping, check log file. \n\t %s'
                % (self.params['sample'], " ".join(args)))

        #Extract the PSL to a dict
        self.params['mapping_dict'] = self.PSL_to_dict(
            os.path.join(working_dir, 'mapping.psl'))

        #Cleanup
        os.remove(os.path.join(working_dir, 'mapping.psl'))
        out.close()
Exemple #14
0
 def killall(self):
     for i in range(self.nprocs):
         logger.debug("Shutting down %s" % (self.workers[i].name))
         self.workers[i].terminate()
         self.workers[i].join()
Exemple #15
0
 def killall(self):
     for i in range(self.nprocs):
         logger.debug("Shutting down %s" % (self.workers[i].name))
         self.workers[i].terminate()
         self.workers[i].join()
Exemple #16
0
 def debug(self, msg):
     if self.loglevel == logging.DEBUG:
         name = self.name
         logger.debug("%-12s| %s" % (name, msg))
Exemple #17
0
    def run_blat(self):
        #Check for necessary params:
        if not ('sample' in self.params and 'reference' in self.params and 'working_dir' in self.params and (('PE1' in self.params   and 'PE2' in self.params) or 'SE' in self.params)):
            raise exceptions.FatalError('Missing self.params in run_bowtie2.')
        #Check for necessary files:
        if os.path.exists(self.params['reference']) is False:
            raise exceptions.FatalError("Missing reference file for mapping")
        if 'PE1' in self.params and 'PE2' in self.params:
            if not (os.path.exists(self.params['PE1']) and os.path.exists(self.params['PE2'])):
                raise exceptions.FatalError(
                    "One or both PE files can not be found for mapping.")
        if 'SE' in self.params:
            if not os.path.exists(self.params['SE']):
                raise exceptions.FatalError("SE file cannot be found.")

        #Blat doesn't need an index
        working_dir = self.params['working_dir']

        #Check whether to log to temporary file, or default to os.devnull
        if 'verbose' in self.params:
            out = open(os.path.join(working_dir, "mapping_log.txt"), 'w')
        else:
            out = open(os.devnull, 'w')

        #Build a temporary txt file with all of the reads:
        allreads_outf = open(os.path.join(working_dir, 'reads.txt'), 'w')
        if 'PE1' in self.params and 'PE2' in self.params:
            allreads_outf.write(self.params['PE1'] + '\n')
            allreads_outf.write(self.params['PE2'] + '\n')
        if 'SE' in self.params:
            allreads_outf.write(self.params['SE'] + '\n')
        allreads_outf.close()

        #Do blat mapping
        args = ['blat', self.params['reference'], os.path.join(working_dir, 'reads.txt')]
        if self.params['format'] == 'fastq':
            args.append('-fastq')
        if self.params['fastmap']:
            args.append('-fastMap')
        #Some new experimental params to increase specificity after the first iteration:
        if self.params['maskrepeats']:
            args.append("-mask=lower")
        if self.params['iteration'] > 0 or not self.params['sloppymapping']:
            args.append("-minIdentity=98")
            args.append("-minScore=40")
        args.append(os.path.join(working_dir, 'mapping.psl'))

        logger.info("Sample: %s Calling blat mapper" % self.params['sample'])
        logger.debug(" ".join(args))
        try:
            ret = subprocess.call(args, stdout=out, stderr=out)
        except Exception as exc:
            txt = ("Sample %s: Unhandeled error running blat mapping, check log file." % self.params['sample']) + '\n\t' + str(exc)
            raise exceptions.FatalError(txt)
        finally:
            out.close()
        if ret != 0:
            raise exceptions.FatalError('Sample: %s Error running blat mapping, check log file. \n\t %s' % (self.params['sample'], " ".join(args)))

        #Extract the PSL to a dict
        self.params['mapping_dict'] = self.PSL_to_dict(os.path.join(working_dir, 'mapping.psl'))

        #Cleanup
        os.remove(os.path.join(working_dir, 'mapping.psl'))
        out.close()