def _create_dummy_job(meth, status, desc, complete): """Makes a dummy job, owned by the user, and NOT registered. :param status: Dummy job status :type status: kbtypes.Unicode :ui_name status: Job status :default status: Dummy status :param desc: Dummy job description :type desc: kbtypes.Unicode :ui_name desc: Job description :default desc: Dummy job description :param complete: Dummy job est. completion :type complete: kbtypes.Unicode :ui_name complete: Dummy completion time (timestamp) :default complete: 2020-04-03T08:56:32+0000 :rtype: kbtypes.Unicode :return: job info """ meth.stages = 2 meth.advance("Creating dummy job") ujs = UserAndJobState(url=service.URLS.user_and_job_state, token=meth.token) progress = {'ptype': 'none'} job_info = ujs.create_and_start_job(meth.token, status, desc, progress, complete) meth.advance("Done!") return json.dumps(job_info)
def _create_dummy_job(meth, status, desc, complete): """Makes a dummy job, owned by the user, and NOT registered. :param status: Dummy job status :type status: kbtypes.Unicode :ui_name status: Job status :default status: Dummy status :param desc: Dummy job description :type desc: kbtypes.Unicode :ui_name desc: Job description :default desc: Dummy job description :param complete: Dummy job est. completion :type complete: kbtypes.Unicode :ui_name complete: Dummy completion time (timestamp) :default complete: 2020-04-03T08:56:32+0000 :rtype: kbtypes.Unicode :return: job info """ meth.stages = 2 meth.advance("Creating dummy job") ujs = UserAndJobState(url=service.URLS.user_and_job_state, token=meth.token) progress = { 'ptype' : 'none' } job_info = ujs.create_and_start_job(meth.token, status, desc, progress, complete) meth.advance("Done!") return json.dumps(job_info)
def start_job(config, context, input): # Create a user and job state client and authenticate as the user. ujsClient = UserAndJobState(config['userandjobstate_url'], token=context['token']) # Create a job to track building the distance matrix. status = 'initializing' description = 'cbd-buildmatrix with %d files for user %s' %(len(input['node_ids'])+len(input['file_paths']), context['user_id']) progress = { 'ptype': 'task', 'max': 6 } job_id = ujsClient.create_and_start_job(context['token'], status, description, progress, timestamp(3600)) # Create working directory for job and build file names. jobDirectory = make_job_dir(config['work_folder_path'], job_id) jobDataFilename = os.path.join(jobDirectory, 'jobdata.json') outputFilename = os.path.join(jobDirectory, 'stdout.log') errorFilename = os.path.join(jobDirectory, 'stderr.log') # Save data required for running the job. # Another option is to create a key of the jobid and store state. jobData = { 'id': job_id, 'input': input, 'context': context, 'config': config } json.dump(jobData, open(jobDataFilename, "w"), indent=4) # Start worker to run the job. jobScript = os.path.join(os.environ['KB_TOP'], 'bin/cbd-runjob') cmdline = "nohup %s %s >%s 2>%s &" %(jobScript, jobDataFilename, outputFilename, errorFilename) status = os.system(cmdline) return job_id
def start_job(config, context, input): # Create a user and job state client and authenticate as the user. ujsClient = UserAndJobState(config['userandjobstate_url'], token=context['token']) # Create a job to track building the distance matrix. status = 'initializing' description = 'cbd-buildmatrix with %d files for user %s' % ( len(input['node_ids']) + len(input['file_paths']), context['user_id']) progress = {'ptype': 'task', 'max': 6} job_id = ujsClient.create_and_start_job(context['token'], status, description, progress, timestamp(3600)) # Create working directory for job and build file names. jobDirectory = make_job_dir(config['work_folder_path'], job_id) jobDataFilename = os.path.join(jobDirectory, 'jobdata.json') outputFilename = os.path.join(jobDirectory, 'stdout.log') errorFilename = os.path.join(jobDirectory, 'stderr.log') # Save data required for running the job. # Another option is to create a key of the jobid and store state. jobData = { 'id': job_id, 'input': input, 'context': context, 'config': config } json.dump(jobData, open(jobDataFilename, "w"), indent=4) # Start worker to run the job. jobScript = os.path.join(os.environ['KB_TOP'], 'bin/cbd-runjob') cmdline = "nohup %s %s >%s 2>%s &" % (jobScript, jobDataFilename, outputFilename, errorFilename) status = os.system(cmdline) return job_id
def annotate(self, ctx, input): # ctx is the context object # return variables are: jobid #BEGIN annotate ''' Compute probabilistic annotations from the specified genome object. The input dictionary must contain the following keys: genome: Name of genome object genome_workspace: Workspace from which to grab the Genome object probanno: Name of probanno object to output probanno_workspace: Workspace to which to save the ProbAnno object The following keys are optional: verbose: Print lots of messages on the progress of the algorithm @param ctx Current context object @param input Dictionary with input parameters for function @return Job ID of job started to compute annotation likelihoods ''' input = self._checkInputArguments(ctx, input, [ "genome", "genome_workspace", "probanno", "probanno_workspace"], { "verbose" : False } ) # Make sure the static database files are ready. self._checkDatabaseFiles(ctx) # Set log level to INFO when verbose parameter is enabled. if input['verbose']: ctx.set_log_level(log.DEBUG) # Make sure the Genome object is available. wsClient = Workspace(self.config["workspace_url"], token=ctx['token']) genomeIdentity = make_object_identity(input['genome_workspace'], input['genome']) wsClient.get_object_info( [ genomeIdentity ], 0 ) # Create a user and job state client and authenticate as the user. ujsClient = UserAndJobState(self.config['userandjobstate_url'], token=ctx['token']) # Create a job to track running probabilistic annotation. description = 'pa-annotate for genome %s to probanno %s for user %s' %(input['genome'], input['probanno'], ctx['user_id']) progress = { 'ptype': 'task', 'max': 5 } jobid = ujsClient.create_and_start_job(ctx['token'], 'initializing', description, progress, timestamp(3600)) ctx.log_info('Job '+jobid+' started for genome '+input['genome']+' to probanno '+input['probanno']) # Run the job on the local machine. if self.config["job_queue"] == "local": # Create working directory for job and build file names. jobDirectory = make_job_directory(self.config['work_folder_path'], jobid) jobDataFilename = os.path.join(jobDirectory, 'jobdata.json') outputFilename = os.path.join(jobDirectory, 'stdout.log') errorFilename = os.path.join(jobDirectory, 'stderr.log') # Save data required for running the job. jobData = { 'id': jobid, 'input': input, 'context': ctx, 'config': self.config } json.dump(jobData, open(jobDataFilename, "w"), indent=4) # Start worker to run the job. jobScript = os.path.join(os.environ['KB_TOP'], 'bin/pa-runjob') cmdline = "nohup %s %s >%s 2>%s &" %(jobScript, jobDirectory, outputFilename, errorFilename) status = os.system(cmdline) ctx.log_info('Job %s is running on local host, status %d' %(jobid, status)) #END annotate # At some point might do deeper type checking... if not isinstance(jobid, basestring): raise ValueError('Method annotate return value ' + 'jobid is not type basestring as required.') # return the results return [jobid]
def run_async (config, ctx, args) : method = ctx['method']; package = ctx['module']; token = ctx['token']; wc = Workspace(url=config['ujs_url'], token=token) uc = UserAndJobState(url=config['ujs_url'], token=token) kb_top = os.environ.get('KB_TOP', '/kb/deployment') cp = ConfigParser.ConfigParser() cp.read('{}/services/{}/service.cfg'.format(kb_top, package)) method_hash = {} package_hash = {} for k in cp.options(method): method_hash[k] = cp.get(method, k) for k in cp.options(package): package_hash[k] = cp.get(package, k) # UJS status = 'Initializing' description = method_hash["ujs_description"] progress = { 'ptype' : method_hash["ujs_ptype"], 'max' : method_hash["ujs_mstep"] }; est = datetime.datetime.utcnow() + datetime.timedelta(minutes=int(method_hash['ujs_mtime'])) ujs_job_id = uc.create_and_start_job(token, status, description, progress, est.strftime('%Y-%m-%dT%H:%M:%S+0000')); clientgroups = package_hash["clientgroups"]; if clientgroups == None: clientgroups = "prod" job_config_fn = "{}/services/{}/awf/{}.awf".format(kb_top,package,ujs_job_id); job_config = {"info" : { "pipeline" : package, "name" : method, "user" : ctx['user_id'], "clientgroups" : clientgroups, "jobId" : ujs_job_id }, "tasks" : [ ] }; #my @task_list = grep /^$method.task\d+_cmd_name$/, keys %method_hash; task_list = [ l for l in method_hash if l.startswith('task') and l.endswith('_cmd_name')] for task_id in range(1,len(task_list)+1,1): task_cmd_name = "task{}_cmd_name".format(task_id) if task_cmd_name not in task_list: raise Exception('Task {} is not defined out of {} tasks'.format(task_cmd, len(task_list))) task_cmd_args = arg_substituting( package, config, args, method_hash['task%d_cmd_args' % task_id]); task_cmd_args = task_cmd_args.replace('KBWF_COMMON.ujs_jid',ujs_job_id + " ");# support ujs job id in command args host_keys = [ mk for mk in method_hash if mk.startswith('task{}_inputs_'.format(task_id)) and mk.endswith('_host')] inputs ={}; for input_host in host_keys: m = re.match('task{}_inputs_(.*)_host'.format(task_id), input_host) if m is None: continue var_name = m.group(0) m = re.search('@{}\s'.format(var_name), task_cmd_args) if m is None: raise Exception('The shock input variable ({}) is not defined in {}'.format(var_name, task_cmd)) if "task{}_inputs_{}_node".format(task_id,var_name) not in method_hash: raise Exception('The shock node id for input variable ({}) is not defined}'.format(var_name)) inputs[var_name] = {'host' : arg_substituting(package, config, args, method_hash[input_host])} inputs[var_name]['node'] = arg_substituting(package, config, args, method_hash["task{}_inputs_{}_node".format(task_id,var_name)]) host_keys = [ mk for mk in method_hash if mk.startswith('task{}_outputs_'.format(task_id)) and mk.endswith('_host')] outputs ={}; for output_host in host_keys: m = re.match('task{}_outputs_(.*)_host'.format(task_id), input_host) if m is None: continue var_name = m.group(0) m = re.search('@{}\s'.format(var_name), task_cmd_args) if m is None: raise Exception('The shock input variable ({}) is not defined in {}'.format(var_name, task_cmd)) if "task{}_inputs_{}_node".format(task_id,var_name) not in method_hash: raise Exception('The shock node id for input variable ({}) is not defined}'.format(var_name)) outputs[var_name] = {'host' : arg_substituting(package, config, args, method_hash[output_host])} task = { "cmd" : { "args" : task_cmd_args, "description" : method_hash["task{}_cmd_description".format(task_id)], "name" : method_hash["task{}_cmd_name".format(task_id)] }, "inputs" : inputs, "outputs" : outputs, "taskid" : method_hash["task{}_taskid".format(task_id)], 'skip' : int(method_hash["task{}_skip".format(task_id)]), 'totalwork' : int(method_hash["task{}_totalwork".format(task_id)]) }; if(method_hash["task{}_dependson".format(task_id)] == "") : task["dependsOn"] = [] else: ta = method_hash["task{}_dependson".format(task_id)].split(',') task["dependsOn"] = ta if method_hash["task{}_token".format(task_id)] == "true" : task['cmd']['environ'] = {"private" : {"KB_AUTH_TOKEN" : token} } job_config['tasks'].append(task); # for logging purpose... we do not need to write it to file with open(job_config_fn, 'w') as ajc: jcstr = json.dump(job_config,ajc, indent=4) header = dict() header["Authorization"] = "OAuth %s" % token dataFile = open(os.path.abspath(job_config_fn)) m = MultipartEncoder(fields={'upload': (os.path.split(job_config_fn)[-1], dataFile)}) header['Content-Type'] = m.content_type try: response = requests.post(config['awe_url']+ "/job", headers=header, data=m, allow_redirects=True, verify=True) dataFile.close() if not response.ok: response.raise_for_status() result = response.json() if result['error']: raise Exception(result['error'][0]) else: job_id = [result["data"]['id'], ujs_job_id] except: dataFile.close() raise return job_id;