Ejemplo n.º 1
0
    def __init__(self, shockurl, arasturl, config, threads, queue, kill_queue,
                 job_list, ctrl_conf):
        self.parser = SafeConfigParser()
        self.parser.read(config)
        self.job_list = job_list
        # Load plugins
        self.pmanager = ModuleManager(threads, kill_queue, job_list)

        # Set up environment
        self.shockurl = shockurl
        self.arasturl = arasturl
        self.datapath = self.parser.get('compute', 'datapath')
        if queue:
            self.queue = queue
            print('Using queue:{}'.format(self.queue))
        else:
            self.queue = self.parser.get('rabbitmq', 'default_routing_key')
        self.min_free_space = float(
            self.parser.get('compute', 'min_free_space'))
        m = ctrl_conf['meta']
        a = ctrl_conf['assembly']

        self.metadata = meta.MetadataConnection(arasturl, int(a['mongo_port']),
                                                m['mongo.db'],
                                                m['mongo.collection'],
                                                m['mongo.collection.auth'])
        self.gc_lock = multiprocessing.Lock()
Ejemplo n.º 2
0
    def __init__(self, shockurl, rmq_host, rmq_port, mongo_host, mongo_port,
                 config, threads, queues, kill_list, kill_list_lock, job_list,
                 job_list_lock, ctrl_conf, datapath, binpath, modulebin):
        self.parser = SafeConfigParser()
        self.parser.read(config)
        self.kill_list = kill_list
        self.kill_list_lock = kill_list_lock
        self.job_list = job_list
        self.job_list_lock = job_list_lock
        # Load plugins
        self.threads = threads
        self.binpath = binpath
        self.modulebin = modulebin
        self.pmanager = ModuleManager(threads, kill_list, kill_list_lock,
                                      job_list, binpath, modulebin)

        # Set up environment
        self.shockurl = shockurl
        self.datapath = datapath
        self.rmq_host = rmq_host
        self.rmq_port = rmq_port
        self.mongo_host = mongo_host
        self.mongo_port = mongo_port
        self.queues = queues
        self.min_free_space = float(
            self.parser.get('compute', 'min_free_space'))
        self.data_expiration_days = float(
            self.parser.get('compute', 'data_expiration_days'))
        m = ctrl_conf['meta']
        a = ctrl_conf['assembly']

        collections = {
            'jobs': m.get('mongo.collection', 'jobs'),
            'auth': m.get('mongo.collection.auth', 'auth'),
            'data': m.get('mongo.collection.data', 'data'),
            'running': m.get('mongo.collection.running', 'running_jobs')
        }

        ###### TODO Use REST API
        self.metadata = meta.MetadataConnection(self.mongo_host,
                                                self.mongo_port, m['mongo.db'],
                                                collections)
        self.gc_lock = multiprocessing.Lock()
Ejemplo n.º 3
0
    def compute(self, body):
        self.job_list_lock.acquire()
        try:
            job_data = self.prepare_job_data(body)
            self.job_list.append(job_data)
        except:
            logger.error("Error in adding new job to job_list")
            raise
        finally:
            self.job_list_lock.release()

        status = ''
        logger.debug('job_data = {}'.format(job_data))

        params = json.loads(body)
        job_id = params['job_id']
        data_id = params['data_id']
        uid = params['_id']
        user = params['ARASTUSER']
        token = params['oauth_token']
        pipelines = params.get('pipeline')
        recipe = params.get('recipe')
        wasp_in = params.get('wasp')
        jobpath = os.path.join(self.datapath, user, str(data_id), str(job_id))

        url = shock.verify_shock_url(self.shockurl)

        self.start_time = time.time()

        timer_thread = UpdateTimer(self.metadata, 29, time.time(), uid,
                                   self.done_flag)
        timer_thread.start()

        #### Parse pipeline to wasp exp
        reload(recipes)
        if recipe:
            try:
                wasp_exp = recipes.get(recipe[0], job_id)
            except AttributeError:
                raise Exception('"{}" recipe not found.'.format(recipe[0]))
        elif wasp_in:
            wasp_exp = wasp_in[0]
        elif not pipelines:
            wasp_exp = recipes.get('auto', job_id)
        elif pipelines:
            ## Legacy client
            if pipelines[0] == 'auto':
                wasp_exp = recipes.get('auto', job_id)
            ##########
            else:
                if type(pipelines[0]) is not list:  # --assemblers
                    pipelines = [pipelines]
                all_pipes = []
                for p in pipelines:
                    all_pipes += self.pmanager.parse_input(p)
                logger.debug("pipelines = {}".format(all_pipes))
                wasp_exp = wasp.pipelines_to_exp(all_pipes, params['job_id'])
        else:
            raise asmtypes.ArastClientRequestError('Malformed job request.')
        logger.debug('Wasp Expression: {}'.format(wasp_exp))
        w_engine = wasp.WaspEngine(self.pmanager, job_data, self.metadata)

        ###### Run Job
        try:
            w_engine.run_expression(wasp_exp, job_data)
            ###### Upload all result files and place them into appropriate tags
            uploaded_fsets = job_data.upload_results(url, token)

            # Format report
            new_report = open('{}.tmp'.format(self.out_report_name), 'w')

            ### Log errors
            if len(job_data['errors']) > 0:
                new_report.write('PIPELINE ERRORS\n')
                for i, e in enumerate(job_data['errors']):
                    new_report.write('{}: {}\n'.format(i, e))
            try:  ## Get Quast output
                quast_report = job_data['wasp_chain'].find_module(
                    'quast')['data'].find_type('report')[0].files[0]
                with open(quast_report) as q:
                    new_report.write(q.read())
            except:
                new_report.write('No Summary File Generated!\n\n\n')
            self.out_report.close()
            with open(self.out_report_name) as old:
                new_report.write(old.read())

            for log in job_data['logfiles']:
                new_report.write('\n{1} {0} {1}\n'.format(
                    os.path.basename(log), '=' * 20))
                with open(log) as l:
                    new_report.write(l.read())

            ### Log tracebacks
            if len(job_data['tracebacks']) > 0:
                new_report.write('EXCEPTION TRACEBACKS\n')
                for i, e in enumerate(job_data['tracebacks']):
                    new_report.write('{}: {}\n'.format(i, e))

            new_report.close()
            os.remove(self.out_report_name)
            shutil.move(new_report.name, self.out_report_name)
            res = self.upload(url, user, token, self.out_report_name)
            report_info = asmtypes.FileInfo(self.out_report_name,
                                            shock_url=url,
                                            shock_id=res['data']['id'])

            self.metadata.update_job(
                uid, 'report', [asmtypes.set_factory('report', [report_info])])
            status = 'Complete with errors' if job_data.get(
                'errors') else 'Complete'

            ## Make compatible with JSON dumps()
            del job_data['out_report']
            del job_data['initial_reads']
            del job_data['raw_reads']
            self.metadata.update_job(uid, 'data', job_data)
            self.metadata.update_job(uid, 'result_data', uploaded_fsets)
            ###### Legacy Support #######
            filesets = uploaded_fsets.append(
                asmtypes.set_factory('report', [report_info]))
            contigsets = [
                fset for fset in uploaded_fsets
                if fset.type == 'contigs' or fset.type == 'scaffolds'
            ]
            download_ids = {
                fi['filename']: fi['shock_id']
                for fset in uploaded_fsets for fi in fset['file_infos']
            }
            contig_ids = {
                fi['filename']: fi['shock_id']
                for fset in contigsets for fi in fset['file_infos']
            }
            self.metadata.update_job(uid, 'result_data_legacy', [download_ids])
            self.metadata.update_job(uid, 'contig_ids', [contig_ids])
            ###################

            sys.stdout.flush()
            touch(os.path.join(jobpath, "_DONE_"))
            logger.info('============== JOB COMPLETE ===============')

        except asmtypes.ArastUserInterrupt:
            status = 'Terminated by user'
            sys.stdout.flush()
            touch(os.path.join(jobpath, "_CANCELLED__"))
            logger.info('============== JOB KILLED ===============')

        finally:
            self.remove_job_from_lists(job_data)
            logger.debug('Reinitialize plugin manager...'
                         )  # Reinitialize to get live changes
            self.pmanager = ModuleManager(self.threads, self.kill_list,
                                          self.kill_list_lock, self.job_list,
                                          self.binpath, self.modulebin)

        self.metadata.update_job(uid, 'status', status)