Beispiel #1
0
def _create_dummy_job(meth, status, desc, complete):
    """Makes a dummy job, owned by the user, and NOT registered.

    :param status: Dummy job status
    :type status: kbtypes.Unicode
    :ui_name status: Job status
    :default status: Dummy status

    :param desc: Dummy job description
    :type desc: kbtypes.Unicode
    :ui_name desc: Job description
    :default desc: Dummy job description

    :param complete: Dummy job est. completion
    :type complete: kbtypes.Unicode
    :ui_name complete: Dummy completion time (timestamp)
    :default complete: 2020-04-03T08:56:32+0000
    
    :rtype: kbtypes.Unicode
    :return: job info
    """
    meth.stages = 2
    meth.advance("Creating dummy job")

    ujs = UserAndJobState(url=service.URLS.user_and_job_state,
                          token=meth.token)
    progress = {'ptype': 'none'}
    job_info = ujs.create_and_start_job(meth.token, status, desc, progress,
                                        complete)

    meth.advance("Done!")
    return json.dumps(job_info)
Beispiel #2
0
def _create_dummy_job(meth, status, desc, complete):
    """Makes a dummy job, owned by the user, and NOT registered.

    :param status: Dummy job status
    :type status: kbtypes.Unicode
    :ui_name status: Job status
    :default status: Dummy status

    :param desc: Dummy job description
    :type desc: kbtypes.Unicode
    :ui_name desc: Job description
    :default desc: Dummy job description

    :param complete: Dummy job est. completion
    :type complete: kbtypes.Unicode
    :ui_name complete: Dummy completion time (timestamp)
    :default complete: 2020-04-03T08:56:32+0000
    
    :rtype: kbtypes.Unicode
    :return: job info
    """
    meth.stages = 2
    meth.advance("Creating dummy job")

    ujs = UserAndJobState(url=service.URLS.user_and_job_state, token=meth.token)
    progress = { 'ptype' : 'none' }
    job_info = ujs.create_and_start_job(meth.token, status, desc, progress, complete)

    meth.advance("Done!")
    return json.dumps(job_info)
Beispiel #3
0
def start_job(config, context, input):
    # Create a user and job state client and authenticate as the user.
    ujsClient = UserAndJobState(config['userandjobstate_url'], token=context['token'])

    # Create a job to track building the distance matrix.
    status = 'initializing'
    description = 'cbd-buildmatrix with %d files for user %s' %(len(input['node_ids'])+len(input['file_paths']), context['user_id'])
    progress = { 'ptype': 'task', 'max': 6 }
    job_id = ujsClient.create_and_start_job(context['token'], status, description, progress, timestamp(3600))

    # Create working directory for job and build file names.
    jobDirectory = make_job_dir(config['work_folder_path'], job_id)
    jobDataFilename = os.path.join(jobDirectory, 'jobdata.json')
    outputFilename = os.path.join(jobDirectory, 'stdout.log')
    errorFilename = os.path.join(jobDirectory, 'stderr.log')

    # Save data required for running the job.
    # Another option is to create a key of the jobid and store state.
    jobData = { 'id': job_id, 'input': input, 'context': context, 'config': config }
    json.dump(jobData, open(jobDataFilename, "w"), indent=4)

    # Start worker to run the job.
    jobScript = os.path.join(os.environ['KB_TOP'], 'bin/cbd-runjob')
    cmdline = "nohup %s %s >%s 2>%s &" %(jobScript, jobDataFilename, outputFilename, errorFilename)
    status = os.system(cmdline)
    return job_id
Beispiel #4
0
def start_job(config, context, input):
    # Create a user and job state client and authenticate as the user.
    ujsClient = UserAndJobState(config['userandjobstate_url'],
                                token=context['token'])

    # Create a job to track building the distance matrix.
    status = 'initializing'
    description = 'cbd-buildmatrix with %d files for user %s' % (
        len(input['node_ids']) + len(input['file_paths']), context['user_id'])
    progress = {'ptype': 'task', 'max': 6}
    job_id = ujsClient.create_and_start_job(context['token'], status,
                                            description, progress,
                                            timestamp(3600))

    # Create working directory for job and build file names.
    jobDirectory = make_job_dir(config['work_folder_path'], job_id)
    jobDataFilename = os.path.join(jobDirectory, 'jobdata.json')
    outputFilename = os.path.join(jobDirectory, 'stdout.log')
    errorFilename = os.path.join(jobDirectory, 'stderr.log')

    # Save data required for running the job.
    # Another option is to create a key of the jobid and store state.
    jobData = {
        'id': job_id,
        'input': input,
        'context': context,
        'config': config
    }
    json.dump(jobData, open(jobDataFilename, "w"), indent=4)

    # Start worker to run the job.
    jobScript = os.path.join(os.environ['KB_TOP'], 'bin/cbd-runjob')
    cmdline = "nohup %s %s >%s 2>%s &" % (jobScript, jobDataFilename,
                                          outputFilename, errorFilename)
    status = os.system(cmdline)
    return job_id
Beispiel #5
0
    def annotate(self, ctx, input):
        # ctx is the context object
        # return variables are: jobid
        #BEGIN annotate
        ''' Compute probabilistic annotations from the specified genome object.

            The input dictionary must contain the following keys:
            genome: Name of genome object
            genome_workspace: Workspace from which to grab the Genome object
            probanno: Name of probanno object to output
            probanno_workspace: Workspace to which to save the ProbAnno object

            The following keys are optional:
            verbose: Print lots of messages on the progress of the algorithm

            @param ctx Current context object
            @param input Dictionary with input parameters for function
            @return Job ID of job started to compute annotation likelihoods
        '''

        input = self._checkInputArguments(ctx, input, 
                                          [ "genome", "genome_workspace", "probanno", "probanno_workspace"],
                                          { "verbose" : False }
                                          )
        
        # Make sure the static database files are ready.
        self._checkDatabaseFiles(ctx)
        
        # Set log level to INFO when verbose parameter is enabled.
        if input['verbose']:
            ctx.set_log_level(log.DEBUG)

        # Make sure the Genome object is available.
        wsClient = Workspace(self.config["workspace_url"], token=ctx['token'])
        genomeIdentity = make_object_identity(input['genome_workspace'], input['genome'])
        wsClient.get_object_info( [ genomeIdentity ], 0 )

        # Create a user and job state client and authenticate as the user.
        ujsClient = UserAndJobState(self.config['userandjobstate_url'], token=ctx['token'])

        # Create a job to track running probabilistic annotation.
        description = 'pa-annotate for genome %s to probanno %s for user %s' %(input['genome'], input['probanno'], ctx['user_id'])
        progress = { 'ptype': 'task', 'max': 5 }
        jobid = ujsClient.create_and_start_job(ctx['token'], 'initializing', description, progress, timestamp(3600))
        ctx.log_info('Job '+jobid+' started for genome '+input['genome']+' to probanno '+input['probanno'])

        # Run the job on the local machine.
        if self.config["job_queue"] == "local":
            # Create working directory for job and build file names.
            jobDirectory = make_job_directory(self.config['work_folder_path'], jobid)
            jobDataFilename = os.path.join(jobDirectory, 'jobdata.json')
            outputFilename = os.path.join(jobDirectory, 'stdout.log')
            errorFilename = os.path.join(jobDirectory, 'stderr.log')
    
            # Save data required for running the job.
            jobData = { 'id': jobid, 'input': input, 'context': ctx, 'config': self.config }
            json.dump(jobData, open(jobDataFilename, "w"), indent=4)
    
            # Start worker to run the job.
            jobScript = os.path.join(os.environ['KB_TOP'], 'bin/pa-runjob')
            cmdline = "nohup %s %s >%s 2>%s &" %(jobScript, jobDirectory, outputFilename, errorFilename)
            status = os.system(cmdline)
            ctx.log_info('Job %s is running on local host, status %d' %(jobid, status))

        #END annotate

        # At some point might do deeper type checking...
        if not isinstance(jobid, basestring):
            raise ValueError('Method annotate return value ' +
                             'jobid is not type basestring as required.')
        # return the results
        return [jobid]
Beispiel #6
0
def run_async (config, ctx, args) :

  method  = ctx['method'];
  package = ctx['module'];
  token   = ctx['token'];

  wc = Workspace(url=config['ujs_url'], token=token)
  uc = UserAndJobState(url=config['ujs_url'], token=token)
 
  kb_top = os.environ.get('KB_TOP', '/kb/deployment')

  cp = ConfigParser.ConfigParser()
  cp.read('{}/services/{}/service.cfg'.format(kb_top, package))
  method_hash = {}
  package_hash = {}
  
  for k in cp.options(method): method_hash[k] = cp.get(method, k)
  for k in cp.options(package): package_hash[k] = cp.get(package, k)


  # UJS
  status = 'Initializing'
  description = method_hash["ujs_description"]
  progress = { 'ptype' : method_hash["ujs_ptype"], 'max' : method_hash["ujs_mstep"] };

  est = datetime.datetime.utcnow() + datetime.timedelta(minutes=int(method_hash['ujs_mtime']))
  ujs_job_id = uc.create_and_start_job(token, status, description, progress, est.strftime('%Y-%m-%dT%H:%M:%S+0000'));


  clientgroups = package_hash["clientgroups"];
  if clientgroups == None: clientgroups = "prod" 
  job_config_fn = "{}/services/{}/awf/{}.awf".format(kb_top,package,ujs_job_id);
  job_config = {"info" : 
                       { "pipeline" :  package,
                         "name" : method,
                         "user" : ctx['user_id'],
                         "clientgroups" : clientgroups,
                         "jobId" : ujs_job_id
                      },
                    "tasks" : [ ]
                   };
  #my @task_list = grep /^$method.task\d+_cmd_name$/, keys %method_hash;
  task_list = [ l for l in method_hash if l.startswith('task') and l.endswith('_cmd_name')]



  for task_id in range(1,len(task_list)+1,1):
    task_cmd_name = "task{}_cmd_name".format(task_id)
    if task_cmd_name not in task_list:
      raise Exception('Task {} is not defined out of {} tasks'.format(task_cmd, len(task_list)))
    task_cmd_args = arg_substituting( package, config, args, method_hash['task%d_cmd_args' % task_id]);
    task_cmd_args = task_cmd_args.replace('KBWF_COMMON.ujs_jid',ujs_job_id + " ");# support ujs job id in command args


    host_keys = [ mk for mk in method_hash if mk.startswith('task{}_inputs_'.format(task_id)) and mk.endswith('_host')] 
    inputs ={};
    for  input_host in host_keys:
      m = re.match('task{}_inputs_(.*)_host'.format(task_id), input_host)
      if m is None: continue
      var_name = m.group(0)

      m = re.search('@{}\s'.format(var_name), task_cmd_args)
      if m is None:
        raise Exception('The shock input variable ({}) is not defined in {}'.format(var_name, task_cmd))
      if "task{}_inputs_{}_node".format(task_id,var_name) not in method_hash:
        raise Exception('The shock node id for input variable ({}) is not defined}'.format(var_name))

      inputs[var_name] = {'host' : arg_substituting(package, config, args,  method_hash[input_host])}
      inputs[var_name]['node'] = arg_substituting(package, config, args,  method_hash["task{}_inputs_{}_node".format(task_id,var_name)]) 

    host_keys = [ mk for mk in method_hash if mk.startswith('task{}_outputs_'.format(task_id)) and mk.endswith('_host')] 
    outputs ={};
    for  output_host in host_keys:
      m = re.match('task{}_outputs_(.*)_host'.format(task_id), input_host)
      if m is None: continue
      var_name = m.group(0)

      m = re.search('@{}\s'.format(var_name), task_cmd_args)
      if m is None:
        raise Exception('The shock input variable ({}) is not defined in {}'.format(var_name, task_cmd))
      if "task{}_inputs_{}_node".format(task_id,var_name) not in method_hash:
        raise Exception('The shock node id for input variable ({}) is not defined}'.format(var_name))

      outputs[var_name] = {'host' : arg_substituting(package, config, args,  method_hash[output_host])}

    task = { "cmd" : 
                   { "args" : task_cmd_args,
                     "description" : method_hash["task{}_cmd_description".format(task_id)],
                     "name" : method_hash["task{}_cmd_name".format(task_id)]
                   },
                   "inputs" : inputs,
                   "outputs" : outputs,
                   "taskid" : method_hash["task{}_taskid".format(task_id)],
                   'skip' : int(method_hash["task{}_skip".format(task_id)]),
                   'totalwork' : int(method_hash["task{}_totalwork".format(task_id)])
                           
               };

    if(method_hash["task{}_dependson".format(task_id)] == "") :
      task["dependsOn"] =  []
    else:
      ta = method_hash["task{}_dependson".format(task_id)].split(',')
      task["dependsOn"] = ta

    if method_hash["task{}_token".format(task_id)] == "true" :
      task['cmd']['environ'] =  {"private" : {"KB_AUTH_TOKEN" : token} }
    
    job_config['tasks'].append(task);

  # for logging purpose... we do not need to write it to file
  with  open(job_config_fn, 'w') as ajc:
    jcstr = json.dump(job_config,ajc, indent=4)


  header = dict()
  header["Authorization"] = "OAuth %s" % token

  dataFile = open(os.path.abspath(job_config_fn))
  m = MultipartEncoder(fields={'upload': (os.path.split(job_config_fn)[-1], dataFile)})
  header['Content-Type'] = m.content_type

  try:
      response = requests.post(config['awe_url']+ "/job", headers=header, data=m, allow_redirects=True, verify=True)
      dataFile.close()
  
      if not response.ok:
          response.raise_for_status()

      result = response.json()

      if result['error']:
          raise Exception(result['error'][0])
      else:
          job_id = [result["data"]['id'], ujs_job_id]
  except:
      dataFile.close()
      raise
  return job_id;