def __init__(self, shockurl, arasturl, config, threads, queue, kill_queue, job_list, ctrl_conf): self.parser = SafeConfigParser() self.parser.read(config) self.job_list = job_list # Load plugins self.pmanager = ModuleManager(threads, kill_queue, job_list) # Set up environment self.shockurl = shockurl self.arasturl = arasturl self.datapath = self.parser.get('compute', 'datapath') if queue: self.queue = queue print('Using queue:{}'.format(self.queue)) else: self.queue = self.parser.get('rabbitmq', 'default_routing_key') self.min_free_space = float( self.parser.get('compute', 'min_free_space')) m = ctrl_conf['meta'] a = ctrl_conf['assembly'] self.metadata = meta.MetadataConnection(arasturl, int(a['mongo_port']), m['mongo.db'], m['mongo.collection'], m['mongo.collection.auth']) self.gc_lock = multiprocessing.Lock()
def __init__(self, shockurl, rmq_host, rmq_port, mongo_host, mongo_port, config, threads, queues, kill_list, kill_list_lock, job_list, job_list_lock, ctrl_conf, datapath, binpath, modulebin): self.parser = SafeConfigParser() self.parser.read(config) self.kill_list = kill_list self.kill_list_lock = kill_list_lock self.job_list = job_list self.job_list_lock = job_list_lock # Load plugins self.threads = threads self.binpath = binpath self.modulebin = modulebin self.pmanager = ModuleManager(threads, kill_list, kill_list_lock, job_list, binpath, modulebin) # Set up environment self.shockurl = shockurl self.datapath = datapath self.rmq_host = rmq_host self.rmq_port = rmq_port self.mongo_host = mongo_host self.mongo_port = mongo_port self.queues = queues self.min_free_space = float( self.parser.get('compute', 'min_free_space')) self.data_expiration_days = float( self.parser.get('compute', 'data_expiration_days')) m = ctrl_conf['meta'] a = ctrl_conf['assembly'] collections = { 'jobs': m.get('mongo.collection', 'jobs'), 'auth': m.get('mongo.collection.auth', 'auth'), 'data': m.get('mongo.collection.data', 'data'), 'running': m.get('mongo.collection.running', 'running_jobs') } ###### TODO Use REST API self.metadata = meta.MetadataConnection(self.mongo_host, self.mongo_port, m['mongo.db'], collections) self.gc_lock = multiprocessing.Lock()
def compute(self, body): self.job_list_lock.acquire() try: job_data = self.prepare_job_data(body) self.job_list.append(job_data) except: logger.error("Error in adding new job to job_list") raise finally: self.job_list_lock.release() status = '' logger.debug('job_data = {}'.format(job_data)) params = json.loads(body) job_id = params['job_id'] data_id = params['data_id'] uid = params['_id'] user = params['ARASTUSER'] token = params['oauth_token'] pipelines = params.get('pipeline') recipe = params.get('recipe') wasp_in = params.get('wasp') jobpath = os.path.join(self.datapath, user, str(data_id), str(job_id)) url = shock.verify_shock_url(self.shockurl) self.start_time = time.time() timer_thread = UpdateTimer(self.metadata, 29, time.time(), uid, self.done_flag) timer_thread.start() #### Parse pipeline to wasp exp reload(recipes) if recipe: try: wasp_exp = recipes.get(recipe[0], job_id) except AttributeError: raise Exception('"{}" recipe not found.'.format(recipe[0])) elif wasp_in: wasp_exp = wasp_in[0] elif not pipelines: wasp_exp = recipes.get('auto', job_id) elif pipelines: ## Legacy client if pipelines[0] == 'auto': wasp_exp = recipes.get('auto', job_id) ########## else: if type(pipelines[0]) is not list: # --assemblers pipelines = [pipelines] all_pipes = [] for p in pipelines: all_pipes += self.pmanager.parse_input(p) logger.debug("pipelines = {}".format(all_pipes)) wasp_exp = wasp.pipelines_to_exp(all_pipes, params['job_id']) else: raise asmtypes.ArastClientRequestError('Malformed job request.') logger.debug('Wasp Expression: {}'.format(wasp_exp)) w_engine = wasp.WaspEngine(self.pmanager, job_data, self.metadata) ###### Run Job try: w_engine.run_expression(wasp_exp, job_data) ###### Upload all result files and place them into appropriate tags uploaded_fsets = job_data.upload_results(url, token) # Format report new_report = open('{}.tmp'.format(self.out_report_name), 'w') ### Log errors if len(job_data['errors']) > 0: new_report.write('PIPELINE ERRORS\n') for i, e in enumerate(job_data['errors']): new_report.write('{}: {}\n'.format(i, e)) try: ## Get Quast output quast_report = job_data['wasp_chain'].find_module( 'quast')['data'].find_type('report')[0].files[0] with open(quast_report) as q: new_report.write(q.read()) except: new_report.write('No Summary File Generated!\n\n\n') self.out_report.close() with open(self.out_report_name) as old: new_report.write(old.read()) for log in job_data['logfiles']: new_report.write('\n{1} {0} {1}\n'.format( os.path.basename(log), '=' * 20)) with open(log) as l: new_report.write(l.read()) ### Log tracebacks if len(job_data['tracebacks']) > 0: new_report.write('EXCEPTION TRACEBACKS\n') for i, e in enumerate(job_data['tracebacks']): new_report.write('{}: {}\n'.format(i, e)) new_report.close() os.remove(self.out_report_name) shutil.move(new_report.name, self.out_report_name) res = self.upload(url, user, token, self.out_report_name) report_info = asmtypes.FileInfo(self.out_report_name, shock_url=url, shock_id=res['data']['id']) self.metadata.update_job( uid, 'report', [asmtypes.set_factory('report', [report_info])]) status = 'Complete with errors' if job_data.get( 'errors') else 'Complete' ## Make compatible with JSON dumps() del job_data['out_report'] del job_data['initial_reads'] del job_data['raw_reads'] self.metadata.update_job(uid, 'data', job_data) self.metadata.update_job(uid, 'result_data', uploaded_fsets) ###### Legacy Support ####### filesets = uploaded_fsets.append( asmtypes.set_factory('report', [report_info])) contigsets = [ fset for fset in uploaded_fsets if fset.type == 'contigs' or fset.type == 'scaffolds' ] download_ids = { fi['filename']: fi['shock_id'] for fset in uploaded_fsets for fi in fset['file_infos'] } contig_ids = { fi['filename']: fi['shock_id'] for fset in contigsets for fi in fset['file_infos'] } self.metadata.update_job(uid, 'result_data_legacy', [download_ids]) self.metadata.update_job(uid, 'contig_ids', [contig_ids]) ################### sys.stdout.flush() touch(os.path.join(jobpath, "_DONE_")) logger.info('============== JOB COMPLETE ===============') except asmtypes.ArastUserInterrupt: status = 'Terminated by user' sys.stdout.flush() touch(os.path.join(jobpath, "_CANCELLED__")) logger.info('============== JOB KILLED ===============') finally: self.remove_job_from_lists(job_data) logger.debug('Reinitialize plugin manager...' ) # Reinitialize to get live changes self.pmanager = ModuleManager(self.threads, self.kill_list, self.kill_list_lock, self.job_list, self.binpath, self.modulebin) self.metadata.update_job(uid, 'status', status)