Exemple #1
0
def _create_dummy_job(meth, status, desc, complete):
    """Makes a dummy job, owned by the user, and NOT registered.

    :param status: Dummy job status
    :type status: kbtypes.Unicode
    :ui_name status: Job status
    :default status: Dummy status

    :param desc: Dummy job description
    :type desc: kbtypes.Unicode
    :ui_name desc: Job description
    :default desc: Dummy job description

    :param complete: Dummy job est. completion
    :type complete: kbtypes.Unicode
    :ui_name complete: Dummy completion time (timestamp)
    :default complete: 2020-04-03T08:56:32+0000
    
    :rtype: kbtypes.Unicode
    :return: job info
    """
    meth.stages = 2
    meth.advance("Creating dummy job")

    ujs = UserAndJobState(url=service.URLS.user_and_job_state, token=meth.token)
    progress = { 'ptype' : 'none' }
    job_info = ujs.create_and_start_job(meth.token, status, desc, progress, complete)

    meth.advance("Done!")
    return json.dumps(job_info)
Exemple #2
0
def _create_dummy_job(meth, status, desc, complete):
    """Makes a dummy job, owned by the user, and NOT registered.

    :param status: Dummy job status
    :type status: kbtypes.Unicode
    :ui_name status: Job status
    :default status: Dummy status

    :param desc: Dummy job description
    :type desc: kbtypes.Unicode
    :ui_name desc: Job description
    :default desc: Dummy job description

    :param complete: Dummy job est. completion
    :type complete: kbtypes.Unicode
    :ui_name complete: Dummy completion time (timestamp)
    :default complete: 2020-04-03T08:56:32+0000
    
    :rtype: kbtypes.Unicode
    :return: job info
    """
    meth.stages = 2
    meth.advance("Creating dummy job")

    ujs = UserAndJobState(url=service.URLS.user_and_job_state,
                          token=meth.token)
    progress = {'ptype': 'none'}
    job_info = ujs.create_and_start_job(meth.token, status, desc, progress,
                                        complete)

    meth.advance("Done!")
    return json.dumps(job_info)
Exemple #3
0
def start_job(config, context, input):
    # Create a user and job state client and authenticate as the user.
    ujsClient = UserAndJobState(config['userandjobstate_url'], token=context['token'])

    # Create a job to track building the distance matrix.
    status = 'initializing'
    description = 'cbd-buildmatrix with %d files for user %s' %(len(input['node_ids'])+len(input['file_paths']), context['user_id'])
    progress = { 'ptype': 'task', 'max': 6 }
    job_id = ujsClient.create_and_start_job(context['token'], status, description, progress, timestamp(3600))

    # Create working directory for job and build file names.
    jobDirectory = make_job_dir(config['work_folder_path'], job_id)
    jobDataFilename = os.path.join(jobDirectory, 'jobdata.json')
    outputFilename = os.path.join(jobDirectory, 'stdout.log')
    errorFilename = os.path.join(jobDirectory, 'stderr.log')

    # Save data required for running the job.
    # Another option is to create a key of the jobid and store state.
    jobData = { 'id': job_id, 'input': input, 'context': context, 'config': config }
    json.dump(jobData, open(jobDataFilename, "w"), indent=4)

    # Start worker to run the job.
    jobScript = os.path.join(os.environ['KB_TOP'], 'bin/cbd-runjob')
    cmdline = "nohup %s %s >%s 2>%s &" %(jobScript, jobDataFilename, outputFilename, errorFilename)
    status = os.system(cmdline)
    return job_id
    def test_annotate(self):
        ''' Run pa-annotate on a valid Genome object and verify that the job runs and returns a valid ProbAnno object in the expected time.'''

        # Run the annotate() function to generate a ProbAnno object.
        paClient = ProbabilisticAnnotation(self._config["probanno_url"],
                                           token=self._token)
        jobid = paClient.annotate({
            "genome": self._config["genomeid"],
            "genome_workspace": self._config["test_ws"],
            "probanno": self._config["probannoid"],
            "probanno_workspace": self._config["test_ws"]
        })

        # Allow time for the command to run.
        time.sleep(float(self._config["runtime"]))

        # Make sure the job has completed.
        ujsClient = UserAndJobState(self._config['ujs_url'], token=self._token)
        jobList = ujsClient.list_jobs([self._config['test_user']], 'CE')
        jobCompleted = False
        for job in jobList:
            if jobid == job[0]:
                jobCompleted = True
                jobInfo = job
        self.assertTrue(
            jobCompleted, 'Job did not complete before timeout of %s seconds' %
            (self._config['runtime']))

        # See if the job ended in error.
        details = ''
        if jobInfo[11] == 1:
            details = ujsClient.get_detailed_error(jobInfo[0])
        self.assertEqual(jobInfo[11], 0, 'Job ended in error: %s' % (details))

        # Look for the ProbAnno object in the test workspace.
        wsClient = Workspace(self._config["workspace_url"], token=self._token)
        try:
            probannoObjectId = {
                'workspace': self._config['test_ws'],
                'name': self._config['probannoid']
            }
            objectList = wsClient.get_objects([probannoObjectId])
            probannoObject = objectList[0]
            self.assertEqual(
                probannoObject['info'][1], self._config['probannoid'],
                'ProbAnno object id %s is not %s' %
                (probannoObject['info'][1], self._config['probannoid']))
        except WorkspaceServerError as e:
            traceback.print_exc(file=sys.stderr)
            self.fail(
                msg=
                "The expected object %s did not get created in the workspace %s!\n"
                % (self._config["probannoid"], self._config["test_ws"]))
Exemple #5
0
 def _update_token(self):
     if self._updating:
         return
     if (time.time() - self._updated_at < self.UPDATE_TOKEN_INTERVAL):
         return
     self._updating = True
     print('Updating token at ' + str(time.time()))
     try:
         self._ujs = UserAndJobState(self._url,
                                     user_id=self._user,
                                     password=self._pwd)
         self._updated_at = time.time()
     finally:  # otherwise token will never be updated
         self._updating = False
Exemple #6
0
    def __ujs_client(self, token=None):
        if self.ujs is None:
            token = os.environ['KB_AUTH_TOKEN']
            self.ujs = UserAndJobState(url=URLS.user_and_job_state,
                                       token=token)

        return self.ujs
Exemple #7
0
def _view_job(meth, job_id):
    """Views a job.

    :param job_id: A job id
    :type job_id: kbtypes.Unicode
    :ui_name job_id: A job id
    :rtype: kbtypes.Unicode
    :return: job info
    """
    meth.stages = 2
    meth.advance("Fetching job info")

    ujs = UserAndJobState(url=service.URLS.user_and_job_state, token=meth.token)
    job_info = ujs.get_job_info(job_id)

    meth.advance("Done!")
    return json.dumps(job_info)
Exemple #8
0
def _view_job(meth, job_id):
    """Views a job.

    :param job_id: A job id
    :type job_id: kbtypes.Unicode
    :ui_name job_id: A job id
    :rtype: kbtypes.Unicode
    :return: job info
    """
    meth.stages = 2
    meth.advance("Fetching job info")

    ujs = UserAndJobState(url=service.URLS.user_and_job_state,
                          token=meth.token)
    job_info = ujs.get_job_info(job_id)

    meth.advance("Done!")
    return json.dumps(job_info)
    def test_annotate(self):
        ''' Run pa-annotate on a valid Genome object and verify that the job runs and returns a valid ProbAnno object in the expected time.'''

        # Run the annotate() function to generate a ProbAnno object.
        paClient = ProbabilisticAnnotation(self._config["probanno_url"], token=self._token)
        jobid = paClient.annotate( {
            "genome": self._config["genomeid"],
            "genome_workspace": self._config["test_ws"],
            "probanno": self._config["probannoid"],
            "probanno_workspace": self._config["test_ws"] } )
        
        # Allow time for the command to run.
        time.sleep(float(self._config["runtime"]))
        
        # Make sure the job has completed.
        ujsClient = UserAndJobState(self._config['ujs_url'], token=self._token)
        jobList = ujsClient.list_jobs([ self._config['test_user'] ], 'CE')
        jobCompleted = False
        for job in jobList:
            if jobid == job[0]:
                jobCompleted = True
                jobInfo = job
        self.assertTrue(jobCompleted, 'Job did not complete before timeout of %s seconds' %(self._config['runtime']))
        
        # See if the job ended in error.
        details = ''
        if jobInfo[11] == 1:
            details = ujsClient.get_detailed_error(jobInfo[0])
        self.assertEqual(jobInfo[11], 0, 'Job ended in error: %s' %(details))

        # Look for the ProbAnno object in the test workspace.
        wsClient = Workspace(self._config["workspace_url"], token=self._token)
        try:
            probannoObjectId = { 'workspace': self._config['test_ws'], 'name': self._config['probannoid'] }
            objectList = wsClient.get_objects( [ probannoObjectId ] )
            probannoObject = objectList[0]
            self.assertEqual(probannoObject['info'][1], self._config['probannoid'], 'ProbAnno object id %s is not %s' %(probannoObject['info'][1], self._config['probannoid']))
        except WorkspaceServerError as e:
            traceback.print_exc(file=sys.stderr)
            self.fail(msg = "The expected object %s did not get created in the workspace %s!\n" %(self._config["probannoid"], self._config["test_ws"]))
Exemple #10
0
 def _update_token(self):
     if self._updating:
         return
     if (time.time() - self._updated_at < self.UPDATE_TOKEN_INTERVAL):
         return
     self._updating = True
     print('Updating token at ' + str(time.time()))
     try:
         self._ujs = UserAndJobState(self._url, user_id=self._user,
                                     password=self._pwd)
         self._updated_at = time.time()
     finally:  # otherwise token will never be updated
         self._updating = False
Exemple #11
0
def start_job(config, context, input):
    # Create a user and job state client and authenticate as the user.
    ujsClient = UserAndJobState(config['userandjobstate_url'],
                                token=context['token'])

    # Create a job to track building the distance matrix.
    status = 'initializing'
    description = 'cbd-buildmatrix with %d files for user %s' % (
        len(input['node_ids']) + len(input['file_paths']), context['user_id'])
    progress = {'ptype': 'task', 'max': 6}
    job_id = ujsClient.create_and_start_job(context['token'], status,
                                            description, progress,
                                            timestamp(3600))

    # Create working directory for job and build file names.
    jobDirectory = make_job_dir(config['work_folder_path'], job_id)
    jobDataFilename = os.path.join(jobDirectory, 'jobdata.json')
    outputFilename = os.path.join(jobDirectory, 'stdout.log')
    errorFilename = os.path.join(jobDirectory, 'stderr.log')

    # Save data required for running the job.
    # Another option is to create a key of the jobid and store state.
    jobData = {
        'id': job_id,
        'input': input,
        'context': context,
        'config': config
    }
    json.dump(jobData, open(jobDataFilename, "w"), indent=4)

    # Start worker to run the job.
    jobScript = os.path.join(os.environ['KB_TOP'], 'bin/cbd-runjob')
    cmdline = "nohup %s %s >%s 2>%s &" % (jobScript, jobDataFilename,
                                          outputFilename, errorFilename)
    status = os.system(cmdline)
    return job_id
Exemple #12
0
def __init_client(client_name):
    if client_name == 'workspace':
        c = Workspace(URLS.workspace)
    elif client_name == 'job_service':
        c = NarrativeJobService(URLS.job_service)
    elif client_name == 'narrative_method_store':
        c = NarrativeMethodStore(URLS.narrative_method_store)
    elif client_name == 'user_and_job_state':
        c = UserAndJobState(URLS.user_and_job_state)
    elif client_name == 'catalog':
        c = Catalog(URLS.catalog)

    else:
        raise ValueError('Unknown client name "%s"' % client_name)

    __clients[client_name] = c
    return c
Exemple #13
0
def __init_client(client_name, token=None):
    if client_name == 'workspace':
        c = Workspace(URLS.workspace, token=token)
    elif client_name == 'narrative_method_store':
        c = NarrativeMethodStore(URLS.narrative_method_store, token=token)
    elif client_name == 'user_and_job_state':
        c = UserAndJobState(URLS.user_and_job_state, token=token)
    elif client_name == 'catalog':
        c = Catalog(URLS.catalog, token=token)
    elif client_name == 'service' or client_name == 'service_wizard':
        c = ServiceClient(URLS.service_wizard,
                          use_url_lookup=True,
                          token=token)
    elif client_name == 'execution_engine2' or client_name == 'execution_engine' or client_name == 'job_service':
        c = execution_engine2(URLS.execution_engine2, token=token)
    elif client_name == 'job_service_mock':
        c = JobServiceMock()
    else:
        raise ValueError('Unknown client name "%s"' % client_name)

    return c
def main():
    """
    KBase Convert task manager for converting between KBase objects.
    
    Step 1 - Run a converter to pull the source object and save the destination object.
    
    Args:
        workspace_service_url: URL for a KBase Workspace service where KBase objects 
                               are stored.
        ujs_service_url: URL for a User and Job State service to report task progress
                         back to the user.
        shock_service_url: URL for a KBase SHOCK data store service for storing files 
                           and large reference data.
        handle_service_url: URL for a KBase Handle service that maps permissions from 
                            the Workspace to SHOCK for KBase types that specify a Handle 
                            reference instead of a SHOCK reference.
        source_workspace_name: The name of the source workspace.
        destination_workspace_name: The name of the destination workspace.
        source_object_name: The source object name.
        destination_object_name: The destination object name.
        source_kbase_type: The KBase Workspace type string that indicates the module
                           and type of the object being created.                       
        destination_kbase_type: The KBase Workspace type string that indicates the module
                                and type of the object being created.
        optional_arguments: This is a JSON string containing optional parameters that can
                            be passed in for custom behavior per conversion.
        ujs_job_id: The job id from the User and Job State service that can be used to
                    report status on task progress back to the user.
        job_details: This is a JSON string that passes in the script specific command
                     line options for a given conversion type.  The service pulls
                     these config settings from a script config created by the developer
                     of the conversion script and passes that into the AWE job that
                     calls this script.
        working_directory: The working directory on disk where files can be created and
                           will be cleaned when the job ends with success or failure.
        keep_working_directory: A flag to tell the script not to delete the working
                                directory, which is mainly for debugging purposes.
    
    Returns:
        Literal return value is 0 for success and 1 for failure.
        
        Actual data output is one or more Workspace objects saved to a user's workspace. 
        
    Authors:
        Matt Henderson, Gavin Price            
    """

    logger = script_utils.stderrlogger(__file__, level=logging.DEBUG)
    logger.info("Executing KBase Convert tasks")
    
    script_details = script_utils.parse_docs(main.__doc__)
    
    logger.debug(script_details["Args"])
    
    parser = script_utils.ArgumentParser(description=script_details["Description"],
                                     epilog=script_details["Authors"])
    # provided by service config
    parser.add_argument('--workspace_service_url', 
                        help=script_details["Args"]["workspace_service_url"], 
                        action='store', 
                        required=True)
    parser.add_argument('--ujs_service_url', 
                        help=script_details["Args"]["ujs_service_url"], 
                        action='store', 
                        required=True)
    
    # optional because not all KBase Workspace types contain a SHOCK or Handle reference
    parser.add_argument('--shock_service_url', 
                        help=script_details["Args"]["shock_service_url"], 
                        action='store', 
                        default=None)
    parser.add_argument('--handle_service_url', 
                        help=script_details["Args"]["handle_service_url"], 
                        action='store', 
                        default=None)

    # workspace info for pulling the data
    parser.add_argument('--source_workspace_name', 
                        help=script_details["Args"]["source_workspace_name"], 
                        action='store', 
                        required=True)
    parser.add_argument('--source_object_name', 
                        help=script_details["Args"]["source_object_name"], 
                        action='store', 
                        required=True)

    # workspace info for saving the data
    parser.add_argument('--destination_workspace_name', 
                        help=script_details["Args"]["destination_workspace_name"], 
                        action='store', 
                        required=True)
    parser.add_argument('--destination_object_name', 
                        help=script_details["Args"]["destination_object_name"], 
                        action='store', 
                        required=True)

    # the types that we are transforming between, currently assumed one to one 
    parser.add_argument('--source_kbase_type', 
                        help=script_details["Args"]["source_kbase_type"], 
                        action='store', 
                        required=True)
    parser.add_argument('--destination_kbase_type', 
                        help=script_details["Args"]["destination_kbase_type"], 
                        action='store', 
                        required=True)

    # any user options provided, encoded as a jason string                           
    parser.add_argument('--optional_arguments', 
                        help=script_details["Args"]["optional_arguments"], 
                        action='store', 
                        default='{}')

    # Used if you are restarting a previously executed job?
    parser.add_argument('--ujs_job_id', 
                        help=script_details["Args"]["ujs_job_id"], 
                        action='store', 
                        default=None, 
                        required=False)

    # config information for running the validate and transform scripts
    parser.add_argument('--job_details', 
                        help=script_details["Args"]["job_details"], 
                        action='store', 
                        default=None)

    # the working directory is where all the files for this job will be written, 
    # and normal operation cleans it after the job ends (success or fail)
    parser.add_argument('--working_directory', 
                        help=script_details["Args"]["working_directory"], 
                        action='store', 
                        default=None, 
                        required=True)
    parser.add_argument('--keep_working_directory', 
                        help=script_details["Args"]["keep_working_directory"], 
                        action='store_true')

    # ignore any extra arguments
    args, unknown = parser.parse_known_args()
            
    kb_token = os.environ.get('KB_AUTH_TOKEN')
    ujs = UserAndJobState(url=args.ujs_service_url, token=kb_token)

    est = datetime.datetime.utcnow() + datetime.timedelta(minutes=3)
    if args.ujs_job_id is not None:
        ujs.update_job_progress(args.ujs_job_id, kb_token, "KBase Data Convert started", 
                                1, est.strftime('%Y-%m-%dT%H:%M:%S+0000'))

    # parse all the json strings from the argument list into dicts
    # TODO had issues with json.loads and unicode strings, workaround was using simplejson and base64
    
    args.optional_arguments = simplejson.loads(base64.urlsafe_b64decode(args.optional_arguments))
    args.job_details = simplejson.loads(base64.urlsafe_b64decode(args.job_details))
    
    if not os.path.exists(args.working_directory):
        os.mkdir(args.working_directory)

    if args.ujs_job_id is not None:
        ujs.update_job_progress(args.ujs_job_id, kb_token, 
                                "Converting from {0} to {1}".format(args.source_kbase_type,args.destination_kbase_type), 
                                1, est.strftime('%Y-%m-%dT%H:%M:%S+0000') )

    # Step 1 : Convert the objects
    try:
        logger.info(args)
    
        convert_args = args.job_details["transform"]
        convert_args["optional_arguments"] = args.optional_arguments
        convert_args["working_directory"] = args.working_directory
        convert_args["workspace_service_url"] = args.workspace_service_url
        convert_args["source_workspace_name"] = args.source_workspace_name
        convert_args["source_object_name"] = args.source_object_name
        convert_args["destination_workspace_name"] = args.destination_workspace_name
        convert_args["destination_object_name"] = args.destination_object_name
        
        logger.info(convert_args)
        
        task_output = handler_utils.run_task(logger, convert_args)
        
        if task_output["stdout"] is not None:
            logger.debug("STDOUT : " + str(task_output["stdout"]))
        
        if task_output["stderr"] is not None:
            logger.debug("STDERR : " + str(task_output["stderr"]))        
    except Exception, e:
        handler_utils.report_exception(logger, 
                         {"message": 'ERROR : Conversion from {0} to {1}'.format(args.source_kbase_type,args.destination_kbase_type),
                          "exc": e,
                          "ujs": ujs,
                          "ujs_job_id": args.ujs_job_id,
                          "token": kb_token,
                         },
                         {"keep_working_directory": args.keep_working_directory,
                          "working_directory": args.working_directory})

        ujs.complete_job(args.ujs_job_id,
                         kb_token,
                         "Convert to {0} failed.".format(
                             args.destination_workspace_name), 
                         str(e),
                         None)
    def annotate(self, ctx, input):
        # ctx is the context object
        # return variables are: jobid
        #BEGIN annotate
        ''' Compute probabilistic annotations from the specified genome object.

            The input dictionary must contain the following keys:
            genome: Name of genome object
            genome_workspace: Workspace from which to grab the Genome object
            probanno: Name of probanno object to output
            probanno_workspace: Workspace to which to save the ProbAnno object

            The following keys are optional:
            verbose: Print lots of messages on the progress of the algorithm

            @param ctx Current context object
            @param input Dictionary with input parameters for function
            @return Job ID of job started to compute annotation likelihoods
        '''

        input = self._checkInputArguments(ctx, input, 
                                          [ "genome", "genome_workspace", "probanno", "probanno_workspace"],
                                          { "verbose" : False }
                                          )
        
        # Make sure the static database files are ready.
        self._checkDatabaseFiles(ctx)
        
        # Set log level to INFO when verbose parameter is enabled.
        if input['verbose']:
            ctx.set_log_level(log.DEBUG)

        # Make sure the Genome object is available.
        wsClient = Workspace(self.config["workspace_url"], token=ctx['token'])
        genomeIdentity = make_object_identity(input['genome_workspace'], input['genome'])
        wsClient.get_object_info( [ genomeIdentity ], 0 )

        # Create a user and job state client and authenticate as the user.
        ujsClient = UserAndJobState(self.config['userandjobstate_url'], token=ctx['token'])

        # Create a job to track running probabilistic annotation.
        description = 'pa-annotate for genome %s to probanno %s for user %s' %(input['genome'], input['probanno'], ctx['user_id'])
        progress = { 'ptype': 'task', 'max': 5 }
        jobid = ujsClient.create_and_start_job(ctx['token'], 'initializing', description, progress, timestamp(3600))
        ctx.log_info('Job '+jobid+' started for genome '+input['genome']+' to probanno '+input['probanno'])

        # Run the job on the local machine.
        if self.config["job_queue"] == "local":
            # Create working directory for job and build file names.
            jobDirectory = make_job_directory(self.config['work_folder_path'], jobid)
            jobDataFilename = os.path.join(jobDirectory, 'jobdata.json')
            outputFilename = os.path.join(jobDirectory, 'stdout.log')
            errorFilename = os.path.join(jobDirectory, 'stderr.log')
    
            # Save data required for running the job.
            jobData = { 'id': jobid, 'input': input, 'context': ctx, 'config': self.config }
            json.dump(jobData, open(jobDataFilename, "w"), indent=4)
    
            # Start worker to run the job.
            jobScript = os.path.join(os.environ['KB_TOP'], 'bin/pa-runjob')
            cmdline = "nohup %s %s >%s 2>%s &" %(jobScript, jobDirectory, outputFilename, errorFilename)
            status = os.system(cmdline)
            ctx.log_info('Job %s is running on local host, status %d' %(jobid, status))

        #END annotate

        # At some point might do deeper type checking...
        if not isinstance(jobid, basestring):
            raise ValueError('Method annotate return value ' +
                             'jobid is not type basestring as required.')
        # return the results
        return [jobid]
    parser.add_argument('-t', '--kbase_type', help='KBase object type', action='store', dest='kbtype', default=None, required=True)

    parser.add_argument('-a', '--opt_args', help='Optional argument json string', action='store', dest='opt_args', default='{"downloader":{}}')

    parser.add_argument('-l', '--support_dir', help='Support directory', action='store', dest='sdir', default='lib')
    parser.add_argument('-d', '--del_lib_dir', help='Delete library directory', action='store', dest='del_tmps', default='true')
    parser.add_argument('-f', '--in_tmp_file', help='Input temporary file name', action='store', dest='itmp', default='infile')
    parser.add_argument('-g', '--out_tmp_file', help='Output temporary file name', action='store', dest='otmp', default='outfile')

    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()

    kb_token = os.environ.get('KB_AUTH_TOKEN')
    ujs = UserAndJobState(url=args.ujs_url, token=kb_token)

    est = datetime.datetime.utcnow() + datetime.timedelta(minutes=3)
    if args.jid is not None:
      ujs.update_job_progress(args.jid, kb_token, 'Dispatched', 1, est.strftime('%Y-%m-%dT%H:%M:%S+0000') )


    ## main loop
    args.opt_args = json.loads(args.opt_args)
    #if 'downloader' not in args.opt_args:
    #  args.opt_args['uploader'] = {}
    #  args.opt_args['uploader']['file'] = args.otmp
    #  args.opt_args['uploader']['input'] = args.inobj_id
    #  args.opt_args['uploader']['jid'] = args.jid
    #  args.opt_args['uploader']['etype'] = args.etype
    downloader = Downloader(args)
        raise Exception("A logger must be provided for status information.")
    
    kb_token = None
    try:                
        kb_token = script_utils.get_token()
        
        assert type(kb_token) == type(str())
    except Exception, e:
        logger.error("Unable to get token!")
        logger.exception(e)
        sys.exit(1)

    ujs = None
    try:
        if ujs_job_id is not None:
            ujs = UserAndJobState(url=ujs_service_url, token=kb_token)
            ujs.get_job_status(ujs_job_id)
    except Exception, e:
        logger.error("Unable to connect to UJS service!")
        logger.exception(e)
        sys.exit(1)

    # used for cleaning up the job if an exception occurs
    cleanup_details = {"keep_working_directory": keep_working_directory,
                       "working_directory": working_directory}

    # used for reporting a fatal condition
    error_object = {"ujs_client": ujs,
                    "ujs_job_id": ujs_job_id,
                    "token": kb_token}
Exemple #18
0
        raise Exception("A logger must be provided for status information.")

    kb_token = None
    try:
        kb_token = script_utils.get_token()

        assert type(kb_token) == type(str())
    except Exception, e:
        logger.error("Unable to get token!")
        logger.exception(e)
        sys.exit(1)

    ujs = None
    try:
        if ujs_job_id is not None:
            ujs = UserAndJobState(url=ujs_service_url, token=kb_token)
            ujs.get_job_status(ujs_job_id)
    except Exception, e:
        logger.error("Unable to connect to UJS service!")
        logger.exception(e)
        sys.exit(1)

    # used for cleaning up the job if an exception occurs
    cleanup_details = {
        "keep_working_directory": keep_working_directory,
        "working_directory": working_directory
    }

    # used for reporting a fatal condition
    error_object = {
        "ujs_client": ujs,
import os
import json
import traceback
from biokbase.probabilistic_annotation.Worker import ProbabilisticAnnotationWorker
from biokbase.userandjobstate.client import UserAndJobState

if __name__ == "__main__":
    parser = argparse.ArgumentParser(prog='pa-runjob')
    parser.add_argument('jobDirectory',
                        help='path to job directory for the job',
                        action='store',
                        default=None)
    args = parser.parse_args()

    # Run the job.
    jobDataPath = os.path.join(args.jobDirectory, "jobdata.json")
    job = json.load(open(jobDataPath, 'r'))
    try:
        worker = ProbabilisticAnnotationWorker()
        worker.runAnnotate(job)
    except Exception as e:
        # Mark the job as failed.
        tb = traceback.format_exc()
        sys.stderr.write(tb)
        ujsClient = UserAndJobState(job['config']['userandjobstate_url'],
                                    token=job['context']['token'])
        ujsClient.complete_job(job['id'], job['context']['token'], 'failed',
                               tb, {})

    exit(0)
Exemple #20
0
    def runJob(self, job):
        
        self.config = job['config']
        self.context = job['context']
        self.input = job['input']
        
        # Create a shock client and authenticate as the user.
        self.shockClient = ShockClient(self.config['shock_url'], self.context['token'])
        
        # Create a user and job state client and authenticate as the user.
        ujsClient = UserAndJobState(self.config['userandjobstate_url'], token=self.context['token'])

        # Create a process pool.
        self.pool = Pool(processes=int(self.config['num_pool_processes']))
        
        # Create a work directory for storing intermediate files.
        self.jobDirectory = make_job_dir(self.config['work_folder_path'], job['id'])
        self._log(log.INFO, 'Job '+job['id']+' running with work folder '+self.jobDirectory)

        # Download input fasta files from Shock and extract sequences to work directory.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'], 'extracting sequence files', 1, timestamp(3600))
        except:
            pass
        resultList = []
        sequenceList = []
        for nodeId in self.input['node_ids']:
            node = self.shockClient.get_node(nodeId)
            sourceFile = os.path.join(self.jobDirectory, node['file']['name'])
            destFile = '%s.sequence' %(os.path.splitext(sourceFile)[0])
            if PairSeparator in destFile: # Check for pair separator string in file name and replace as needed.
                destFile = destFile.replace(PairSeparator, '-')
            sequenceList.append(destFile)
            args = dict() # Needs to be scoped here so each process gets its own copy
            args['format'] = self.input['format']
            args['shockUrl'] = self.config['shock_url']
            args['auth'] = self.context['token']
            args['sequenceLen'] = self.input['sequence_length']
            args['minReads'] = self.input['min_reads']
            args['maxReads'] = self.input['max_reads']
            args['nodeId'] = nodeId
            args['sourceFile'] = sourceFile
            args['destFile'] = destFile
            result = self.pool.apply_async(extract_seq, (args,))
            resultList.append(result)
        for result in resultList:
            if result.get() != 0:
                self._cleanup()
                raise ExtractError("Error extracting sequences from input sequence file, result: %d" %(result.get()))
        for path in self.input['file_paths']:
            sourceFile = os.path.basename(path)
            destFile = '%s/%s.sequence' %(self.jobDirectory, os.path.splitext(sourceFile)[0])
            if PairSeparator in destFile: # Check for pair separator string in file name and replace as needed.
                destFile = destFile.replace(PairSeparator, '-')
            sequenceList.append(destFile)
            args = dict() # Needs to be scoped here so each process gets its own copy
            args['format'] = self.input['format']
            args['shockUrl'] = self.config['shock_url']
            args['auth'] = self.context['token']
            args['sequenceLen'] = self.input['sequence_length']
            args['minReads'] = self.input['min_reads']
            args['maxReads'] = self.input['max_reads']
            args['nodeId'] = None
            args['sourceFile'] = path
            args['destFile'] = destFile
            result = self.pool.apply_async(extract_seq, (args,))
            resultList.append(result)
        for result in resultList:
            try:
                result.get()
            except Exception as e:
                self._cleanup()
                raise ExtractError("Error extracting sequences from input sequence file: %s" %(e.message))

        # Confirm that each file met the criteria for sequence length and number of sequences.
        filesToRemove = list()
        for index in range(len(sequenceList)):
            # See if the file did not have the minimum number of sequences.
            if not os.path.exists(sequenceList[index]):
                filesToRemove.append(index)
                continue

            # See if the file has no data.
            if os.path.getsize(sequenceList[index]) == 0:
                self._cleanup()
                raise SeqLenError("Sequence file '%s' has no sequences" %(sequenceList[index]))

        filteredList = list()
        for index in range(len(sequenceList)):
            if index not in filesToRemove:
                filteredList.append(sequenceList[index])
        if len(filteredList) < 2:
            self._cleanup()
            raise SeqLenError("There are not enough sequence files that meet the sequence length or number of sequences criteria.")

        # Sort the sequences.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'], 'sorting sequence files', 1, timestamp(3600))
        except:
            pass
        resultList = []
        sortedList = []
        for sourceFile in filteredList:
            destFile = '%s.sorted' %(os.path.splitext(sourceFile)[0])
            sortedList.append(destFile)
            args = [ '/usr/bin/sort', '--output=%s' %(destFile), sourceFile ]
            result = self.pool.apply_async(run_command, (args,))
            resultList.append(result)
        for result in resultList:
            try:
                result.get()
            except CommandError as e:
                self._cleanup()
                raise SortError("Error sorting sequence file: %s\nCommand: '%s'\nStdout: '%s'\nStderr: '%s'" %(e.message, e.cmd, e.stdout, e.stderr))
             
        # Create combined and sorted files.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'], 'merging all pairs of sequence files', 1, timestamp(3600))
        except:
            pass
        resultList = []
        for p,q in combinations(sortedList, 2):
            pbase = os.path.basename(p)
            qbase = os.path.basename(q)
            dbase = '%s%s%s.sorted' %(os.path.splitext(pbase)[0], PairSeparator, os.path.splitext(qbase)[0])
            destFile = os.path.join(self.jobDirectory, dbase)
            sortedList.append(destFile)
            args = [ '/usr/bin/sort', '-m', '--output=%s' %(destFile), p, q ]
            result = self.pool.apply_async(run_command, (args,))
            resultList.append(result)
        for result in resultList:
            try:
                result.get()
            except CommandError as e:
                self._cleanup()
                raise MergeError("Error merging sequence file: %s\nCommand: '%s'\nStdout: '%s'\nStderr: '%s'" %(e.message, e.cmd, e.stdout, e.stderr))
                   
        # Compress all sorted files.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'], 'compressing sequence files', 1, timestamp(3600))
        except:
            pass
        resultList = []
        compressedList = []
        for sourceFile in sortedList:
            compressedList.append(sourceFile+'.xz')
            if self.input['extreme']:
                level = '-9e'
            else:
                level = '-9'
            args = [ '/usr/bin/xz', '--keep', level, '--no-warn', sourceFile ]
            result = self.pool.apply_async(run_command, (args,))
            resultList.append(result)
        for result in resultList:
            try:
                result.get()
            except CommandError as e:
                self._cleanup()
                raise CompressError("Error compressing sequence file: %s\nCommand: '%s'\nStdout: '%s'\nStderr: '%s'" %(e.message, e.cmd, e.stdout, e.stderr))
        
        # Calculate the distance matrix.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'], 'calculating distance matrix', 1, timestamp(3600))
        except:
            pass
        csvFile = os.path.join(self.jobDirectory, '%s.csv' %(job['id']))
        self._cbdCalculator(compressedList, self.input['scale'], csvFile)
        
        # Store the output file in shock.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'], 'storing output file in shock', 1, timestamp(3600))
        except:
            pass
        node = self.shockClient.create_node(csvFile, '')
        if not node['id']:
            # Shock let us down. Save the distance matrix in the work directory for possible recovery.
            os.rename(csvFile, '%s/%s.csv' %(self.config['work_folder_path'], job['id']))
            self._cleanup()
            raise ShockError("Error saving distance matrix file to Shock. A Shock node was not created.")
        
        # Mark the job as complete.
        results = { 'shocknodes': [ node['id'] ], 'shockurl': self.config['shock_url'] }
        ujsClient.complete_job(job['id'], self.context['token'], 'done', None, results)
        self._log(log.INFO, 'Job '+job['id']+' completed successfully')

        # Cleanup after ourselves.
        self._cleanup()
        
        return
Exemple #21
0
class NarrativeJobProxy:
    '''
    Module Name:
    NarrativeJobProxy

    Module Description:
    Very simple proxy that reauthenticates requests to the user_and_job_state
service as the narrative user.

DO NOT DEPLOY PUBLICALLY
    '''

    ######## WARNING FOR GEVENT USERS #######
    # Since asynchronous IO can lead to methods - even the same method -
    # interrupting each other, you must be *very* careful when using global
    # state. A method could easily clobber the state set by another while
    # the latter method is running.
    #########################################
    #BEGIN_CLASS_HEADER
    UPDATE_TOKEN_INTERVAL = 24 * 60 * 60  # 1 day in sec

    #    UPDATE_TOKEN_INTERVAL = 10

    def _update_token(self):
        if self._updating:
            return
        if (time.time() - self._updated_at < self.UPDATE_TOKEN_INTERVAL):
            return
        self._updating = True
        print('Updating token at ' + str(time.time()))
        try:
            self._ujs = UserAndJobState(self._url,
                                        user_id=self._user,
                                        password=self._pwd)
            self._updated_at = time.time()
        finally:  # otherwise token will never be updated
            self._updating = False

    #END_CLASS_HEADER

    # config contains contents of config file in a hash or None if it couldn't
    # be found
    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self._user = config.get('narrative_user')
        self._pwd = config.get('narrative_user_pwd')
        if not self._user or not self._pwd:
            raise ValueError(
                'narrative user and/or narrative pwd missing from deploy.cfg')
        self._url = config.get('ujs_url')
        if not self._url:
            raise ValueError('UJS url missing from deploy.cfg')
        self._updated_at = -self.UPDATE_TOKEN_INTERVAL
        self._updating = False
        self._update_token()
        #END_CONSTRUCTOR
        pass

    def ver(self, ctx):
        # ctx is the context object
        # return variables are: ver
        #BEGIN ver
        ver = '0.0.1'
        #END ver

        # At some point might do deeper type checking...
        if not isinstance(ver, basestring):
            raise ValueError('Method ver return value ' +
                             'ver is not type basestring as required.')
        # return the results
        return [ver]

    def get_detailed_error(self, ctx, job):
        # ctx is the context object
        # return variables are: error
        #BEGIN get_detailed_error
        self._update_token()
        error = self._ujs.get_detailed_error(job)
        #END get_detailed_error

        # At some point might do deeper type checking...
        if not isinstance(error, basestring):
            raise ValueError('Method get_detailed_error return value ' +
                             'error is not type basestring as required.')
        # return the results
        return [error]

    def get_job_info(self, ctx, job):
        # ctx is the context object
        # return variables are: info
        #BEGIN get_job_info
        self._update_token()
        info = self._ujs.get_job_info(job)
        #END get_job_info

        # At some point might do deeper type checking...
        if not isinstance(info, list):
            raise ValueError('Method get_job_info return value ' +
                             'info is not type list as required.')
        # return the results
        return [info]
Exemple #22
0
def run_async (config, ctx, args) :

  method  = ctx['method'];
  package = ctx['module'];
  token   = ctx['token'];

  wc = Workspace(url=config['ujs_url'], token=token)
  uc = UserAndJobState(url=config['ujs_url'], token=token)
 
  kb_top = os.environ.get('KB_TOP', '/kb/deployment')

  cp = ConfigParser.ConfigParser()
  cp.read('{}/services/{}/service.cfg'.format(kb_top, package))
  method_hash = {}
  package_hash = {}
  
  for k in cp.options(method): method_hash[k] = cp.get(method, k)
  for k in cp.options(package): package_hash[k] = cp.get(package, k)


  # UJS
  status = 'Initializing'
  description = method_hash["ujs_description"]
  progress = { 'ptype' : method_hash["ujs_ptype"], 'max' : method_hash["ujs_mstep"] };

  est = datetime.datetime.utcnow() + datetime.timedelta(minutes=int(method_hash['ujs_mtime']))
  ujs_job_id = uc.create_and_start_job(token, status, description, progress, est.strftime('%Y-%m-%dT%H:%M:%S+0000'));


  clientgroups = package_hash["clientgroups"];
  if clientgroups == None: clientgroups = "prod" 
  job_config_fn = "{}/services/{}/awf/{}.awf".format(kb_top,package,ujs_job_id);
  job_config = {"info" : 
                       { "pipeline" :  package,
                         "name" : method,
                         "user" : ctx['user_id'],
                         "clientgroups" : clientgroups,
                         "jobId" : ujs_job_id
                      },
                    "tasks" : [ ]
                   };
  #my @task_list = grep /^$method.task\d+_cmd_name$/, keys %method_hash;
  task_list = [ l for l in method_hash if l.startswith('task') and l.endswith('_cmd_name')]



  for task_id in range(1,len(task_list)+1,1):
    task_cmd_name = "task{}_cmd_name".format(task_id)
    if task_cmd_name not in task_list:
      raise Exception('Task {} is not defined out of {} tasks'.format(task_cmd, len(task_list)))
    task_cmd_args = arg_substituting( package, config, args, method_hash['task%d_cmd_args' % task_id]);
    task_cmd_args = task_cmd_args.replace('KBWF_COMMON.ujs_jid',ujs_job_id + " ");# support ujs job id in command args


    host_keys = [ mk for mk in method_hash if mk.startswith('task{}_inputs_'.format(task_id)) and mk.endswith('_host')] 
    inputs ={};
    for  input_host in host_keys:
      m = re.match('task{}_inputs_(.*)_host'.format(task_id), input_host)
      if m is None: continue
      var_name = m.group(0)

      m = re.search('@{}\s'.format(var_name), task_cmd_args)
      if m is None:
        raise Exception('The shock input variable ({}) is not defined in {}'.format(var_name, task_cmd))
      if "task{}_inputs_{}_node".format(task_id,var_name) not in method_hash:
        raise Exception('The shock node id for input variable ({}) is not defined}'.format(var_name))

      inputs[var_name] = {'host' : arg_substituting(package, config, args,  method_hash[input_host])}
      inputs[var_name]['node'] = arg_substituting(package, config, args,  method_hash["task{}_inputs_{}_node".format(task_id,var_name)]) 

    host_keys = [ mk for mk in method_hash if mk.startswith('task{}_outputs_'.format(task_id)) and mk.endswith('_host')] 
    outputs ={};
    for  output_host in host_keys:
      m = re.match('task{}_outputs_(.*)_host'.format(task_id), input_host)
      if m is None: continue
      var_name = m.group(0)

      m = re.search('@{}\s'.format(var_name), task_cmd_args)
      if m is None:
        raise Exception('The shock input variable ({}) is not defined in {}'.format(var_name, task_cmd))
      if "task{}_inputs_{}_node".format(task_id,var_name) not in method_hash:
        raise Exception('The shock node id for input variable ({}) is not defined}'.format(var_name))

      outputs[var_name] = {'host' : arg_substituting(package, config, args,  method_hash[output_host])}

    task = { "cmd" : 
                   { "args" : task_cmd_args,
                     "description" : method_hash["task{}_cmd_description".format(task_id)],
                     "name" : method_hash["task{}_cmd_name".format(task_id)]
                   },
                   "inputs" : inputs,
                   "outputs" : outputs,
                   "taskid" : method_hash["task{}_taskid".format(task_id)],
                   'skip' : int(method_hash["task{}_skip".format(task_id)]),
                   'totalwork' : int(method_hash["task{}_totalwork".format(task_id)])
                           
               };

    if(method_hash["task{}_dependson".format(task_id)] == "") :
      task["dependsOn"] =  []
    else:
      ta = method_hash["task{}_dependson".format(task_id)].split(',')
      task["dependsOn"] = ta

    if method_hash["task{}_token".format(task_id)] == "true" :
      task['cmd']['environ'] =  {"private" : {"KB_AUTH_TOKEN" : token} }
    
    job_config['tasks'].append(task);

  # for logging purpose... we do not need to write it to file
  with  open(job_config_fn, 'w') as ajc:
    jcstr = json.dump(job_config,ajc, indent=4)


  header = dict()
  header["Authorization"] = "OAuth %s" % token

  dataFile = open(os.path.abspath(job_config_fn))
  m = MultipartEncoder(fields={'upload': (os.path.split(job_config_fn)[-1], dataFile)})
  header['Content-Type'] = m.content_type

  try:
      response = requests.post(config['awe_url']+ "/job", headers=header, data=m, allow_redirects=True, verify=True)
      dataFile.close()
  
      if not response.ok:
          response.raise_for_status()

      result = response.json()

      if result['error']:
          raise Exception(result['error'][0])
      else:
          job_id = [result["data"]['id'], ujs_job_id]
  except:
      dataFile.close()
      raise
  return job_id;
            args.job_details = simplejson.loads(base64.urlsafe_b64decode(args.job_details))
        except Exception, e:
            logger.debug("Exception while loading base64 json strings!")
            sys.exit(1)
    
    kb_token = None
    try:
        kb_token = script_utils.get_token()
    except Exception, e:
        logger.debug("Exception getting token!")
        raise
    
    ujs = None
    try:
        if args.ujs_job_id is not None:
            ujs = UserAndJobState(url=args.ujs_service_url, token=kb_token)
            ujs.get_job_status(args.ujs_job_id)
    except Exception, e:
        logger.debug("Exception talking to UJS!")
        raise
    
    # used for cleaning up the job if an exception occurs
    cleanup_details = {"keep_working_directory": args.keep_working_directory,
                       "working_directory": args.working_directory}

    # used for reporting a fatal condition
    error_object = {"ujs_client": ujs,
                    "ujs_job_id": args.ujs_job_id,
                    "token": kb_token}

    est = datetime.datetime.utcnow() + datetime.timedelta(minutes=5)    
    parser.add_argument('-a', '--opt_args', help='Optional argument json string', action='store', dest='opt_args', default='{"validator":{},"transformer":{}}')

    parser.add_argument('-l', '--support_dir', help='Support directory', action='store', dest='sdir', default='lib')
    parser.add_argument('-d', '--del_lib_dir', help='Delete library directory', action='store', dest='del_tmps', default='true')
    parser.add_argument('-f', '--in_tmp_file', help='Input temporary file name', action='store', dest='itmp', default='infile')
    parser.add_argument('-g', '--out_tmp_file', help='Output temporary file name', action='store', dest='otmp', default='outfile')

    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()

    
    kb_token = os.environ.get('KB_AUTH_TOKEN')
    ujs = UserAndJobState(url=args.ujs_url, token=kb_token)

    est = datetime.datetime.utcnow() + datetime.timedelta(minutes=3)
    if args.jid is not None:
      ujs.update_job_progress(args.jid, kb_token, 'Dispatched', 1, est.strftime('%Y-%m-%dT%H:%M:%S+0000') )


    ## main loop
    args.opt_args = json.loads(args.opt_args)
    if 'uploader' not in args.opt_args:
      args.opt_args['uploader'] = {}
      args.opt_args['uploader']['file'] = args.otmp
      args.opt_args['uploader']['input'] = args.inobj_id
      args.opt_args['uploader']['jid'] = args.jid
      args.opt_args['uploader']['etype'] = args.etype
    uploader = Uploader(args)
                base64.urlsafe_b64decode(args.job_details))
        except Exception, e:
            logger.debug("Exception while loading base64 json strings!")
            sys.exit(1)

    kb_token = None
    try:
        kb_token = script_utils.get_token()
    except Exception, e:
        logger.debug("Exception getting token!")
        raise

    ujs = None
    try:
        if args.ujs_job_id is not None:
            ujs = UserAndJobState(url=args.ujs_service_url, token=kb_token)
            ujs.get_job_status(args.ujs_job_id)
    except Exception, e:
        logger.debug("Exception talking to UJS!")
        raise

    # used for cleaning up the job if an exception occurs
    cleanup_details = {
        "keep_working_directory": args.keep_working_directory,
        "working_directory": args.working_directory
    }

    # used for reporting a fatal condition
    error_object = {
        "ujs_client": ujs,
        "ujs_job_id": args.ujs_job_id,
Exemple #26
0
                        dest='itmp',
                        default='infile')
    parser.add_argument('-g',
                        '--out_tmp_file',
                        help='Output temporary file name',
                        action='store',
                        dest='otmp',
                        default='outfile')

    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()

    kb_token = os.environ.get('KB_AUTH_TOKEN')
    ujs = UserAndJobState(url=args.ujs_url, token=kb_token)

    est = datetime.datetime.utcnow() + datetime.timedelta(minutes=3)
    if args.jid is not None:
        ujs.update_job_progress(args.jid, kb_token, 'Dispatched', 1,
                                est.strftime('%Y-%m-%dT%H:%M:%S+0000'))

    ## main loop
    args.opt_args = json.loads(args.opt_args)
    #if 'downloader' not in args.opt_args:
    #  args.opt_args['uploader'] = {}
    #  args.opt_args['uploader']['file'] = args.otmp
    #  args.opt_args['uploader']['input'] = args.inobj_id
    #  args.opt_args['uploader']['jid'] = args.jid
    #  args.opt_args['uploader']['etype'] = args.etype
    downloader = Downloader(args)
Exemple #27
0
                        help='show job start and end timestamps',
                        action='store_true',
                        dest='showTimes',
                        default=False)
    parser.add_argument('--ujs-url',
                        help='url for user and job state service',
                        action='store',
                        dest='ujsURL',
                        default='https://kbase.us/services/userandjobstate')
    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()

    # Get the status of the specified job.
    ujsClient = UserAndJobState(args.ujsURL)
    try:
        info = job_info_dict(ujsClient.get_job_info(args.jobID))
    except JobStateServerError as e:
        print e.message
        exit(1)

    # Check if the job had an error.
    if info['error']:
        print "Job '%s' ended with error '%s' and no results are available." % (
            args.jobID, info['status'])
        print 'Error details:'
        print ujsClient.get_detailed_error(args.jobID)
        ujsClient.delete_job(args.jobID)
        exit(1)
    def runAnnotate(self, job):

        ''' Run an annotate job to create a ProbAnno typed object.

            A ProbAnno typed object is created in four steps: (1) extract amino acid
            sequences from a Genome typed object to a fasta file, (2) run a BLAST search
            using the amino acid sequences against the subsystem BLAST database,
            (3) calculate annotation likelihood scores for each roleset implied by the
            functions of proteins in subsystems, and (4) save the likelihood scores
            to a ProbAnno typed object.

            The Job dictionary contains three main sections: (1) input parameters to
            the annotate() function, (2) context of server instance running the
            annotate() function, and (3) config variables of server.

            @param job Job dictionary created by server's annotate() function
            @return Nothing (although job is marked as complete)
        '''

        # The input parameters and user context for annotate() were stored in the job data for the job.
        input = job["input"]
        if input['verbose']:
            self.logger.set_log_level(log.DEBUG)
        self.ctx = job["context"]
        self.config = job['config']

        # Create a DataParser object for working with the static database files.
        self.dataParser = DataParser(self.config)

        status = None

        try:
            # Make sure the database files are available.
            self.dataParser.checkIfDatabaseFilesExist()

            # Make sure the job directory exists.
            workFolder = make_job_directory(self.config['work_folder_path'], job['id'])

            # Create a user and job state client and authenticate as the user.
            ujsClient = UserAndJobState(self.config['userandjobstate_url'], token=self.ctx['token'])
    
            # Get the Genome object from the specified workspace.
            try:
                ujsClient.update_job_progress(job['id'], self.ctx['token'], 'getting genome object', 1, timestamp(3600))
            except:
                pass
            wsClient = Workspace(self.config["workspace_url"], token=self.ctx['token'])
            genomeObjectId = make_object_identity(input["genome_workspace"], input["genome"])
            objectList = wsClient.get_objects( [ genomeObjectId ] )
            genomeObject = objectList[0]
            
            # Convert Genome object to fasta file.
            try:
                ujsClient.update_job_progress(job['id'], self.ctx['token'], 'converting Genome object to fasta file', 1, timestamp(3600))
            except:
                pass
            fastaFile = self._genomeToFasta(input, genomeObject, workFolder)
            
            # Run blast using the fasta file.
            try:
                ujsClient.update_job_progress(job['id'], self.ctx['token'], 'running blast', 1, timestamp(3600))
            except:
                pass
            blastResultFile = self._runBlast(input, fastaFile, workFolder)
            
            # Calculate roleset probabilities.
            try:
                ujsClient.update_job_progress(job['id'], self.ctx['token'], 'calculating roleset probabilities', 1, timestamp(300))
            except:
                pass
            rolestringTuples = self._rolesetProbabilitiesMarble(input, blastResultFile, workFolder)
            
            # Build ProbAnno object and store in the specified workspace.
            try:
                ujsClient.update_job_progress(job['id'], self.ctx['token'], 'building ProbAnno object', 1, timestamp(120))
            except:
                pass
            output = self._buildProbAnnoObject(input, genomeObject, blastResultFile, rolestringTuples, workFolder, wsClient)

            # Mark the job as done.
            status = "done"
            tb = None
            self._log(log.INFO, 'Job '+job['id']+' finished for genome '+input['genome']+' to probanno '+input['probanno'])

        except:
            tb = traceback.format_exc()
            sys.stderr.write('\n'+tb)
            status = "failed"
            self._log(log.ERR, 'Job '+job['id']+' failed for genome '+input['genome']+' to probanno '+input['probanno'])
        
        # Mark the job as complete with the given status.
        ujsClient.complete_job(job['id'], self.ctx['token'], status, tb, { })

        # Remove the temporary work directory.
        if self.logger.get_log_level() < log.DEBUG2 and status == 'done':
            try:
                shutil.rmtree(workFolder)
            except OSError:
                # For some reason deleting the directory was failing in production. Rather than have all jobs look like they failed
                # I catch and log the exception here (since the user still gets the same result if the directory remains intact)
                msg = 'Unable to delete temporary directory %s\n' %(workFolder)
                sys.stderr.write('WARNING: '+msg)
                self._log(log.WARNING, msg)

        return
Exemple #29
0
                        help='url for user and job state service',
                        action='store',
                        dest='ujsURL',
                        default='https://kbase.us/services/userandjobstate/')
    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()

    # Get the list of jobs for the user.
    if 'KB_AUTH_USER_ID' in os.environ:
        userID = os.environ.get('KB_AUTH_USER_ID')
    else:
        auth = _read_inifile()
        userID = auth['user_id']
    ujsClient = UserAndJobState(args.ujsURL)
    try:
        jobList = ujsClient.list_jobs([userID], 'RCE')
    except JobStateServerError as e:
        print 'Error getting job list: ' + e.message
        exit(1)

    # See if the user has any jobs in the list.
    if len(jobList) == 0:
        print 'There are no jobs for you.'
        exit(1)

    # Print info about the specific job if requested.
    if args.jobID is not None:
        for job in jobList:
            info = job_info_dict(job)
    parser.add_argument('-e', '--ext_type', help='External object type', action='store', dest='etype', default=None, required=True)

    parser.add_argument('-a', '--opt_args', help='Optional argument json string', action='store', dest='opt_args', default='{"validator":{},"transformer":{}}')

    parser.add_argument('-l', '--support_dir', help='Support directory', action='store', dest='sdir', default='lib')
    parser.add_argument('-d', '--del_lib_dir', help='Delete library directory', action='store', dest='del_tmps', default='true')
    parser.add_argument('-f', '--in_tmp_file', help='Input temporary file name', action='store', dest='itmp', default='infile')

    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()


    kb_token = os.environ.get('KB_AUTH_TOKEN')
    ujs = UserAndJobState(url=args.ujs_url, token=kb_token)

    est = datetime.datetime.utcnow() + datetime.timedelta(minutes=3)
    if args.jid is not None:
      ujs.update_job_progress(args.jid, kb_token, 'Dispatched', 1, est.strftime('%Y-%m-%dT%H:%M:%S+0000') )


    # main loop
    args.opt_args = json.loads(args.opt_args)

    validator = Validator(args)

    try:
      validator.download_shock_data()
    except:
      e,v = sys.exc_info()[:2]
#! /usr/bin/python

import argparse
import sys
import os
import json
import traceback
from biokbase.probabilistic_annotation.Worker import ProbabilisticAnnotationWorker
from biokbase.userandjobstate.client import UserAndJobState

if __name__ == "__main__":
    parser = argparse.ArgumentParser(prog='pa-runjob')
    parser.add_argument('jobDirectory', help='path to job directory for the job', action='store', default=None)
    args = parser.parse_args()
    
    # Run the job.
    jobDataPath = os.path.join(args.jobDirectory, "jobdata.json")
    job = json.load(open(jobDataPath, 'r'))
    try:
        worker = ProbabilisticAnnotationWorker()
        worker.runAnnotate(job)
    except Exception as e:
        # Mark the job as failed.
        tb = traceback.format_exc()
        sys.stderr.write(tb)
        ujsClient = UserAndJobState(job['config']['userandjobstate_url'], token=job['context']['token'])
        ujsClient.complete_job(job['id'], job['context']['token'], 'failed', tb, { })
    
    exit(0)
Exemple #32
0
    def runJob(self, job):

        self.config = job['config']
        self.context = job['context']
        self.input = job['input']

        # Create a shock client and authenticate as the user.
        self.shockClient = ShockClient(self.config['shock_url'],
                                       self.context['token'])

        # Create a user and job state client and authenticate as the user.
        ujsClient = UserAndJobState(self.config['userandjobstate_url'],
                                    token=self.context['token'])

        # Create a process pool.
        self.pool = Pool(processes=int(self.config['num_pool_processes']))

        # Create a work directory for storing intermediate files.
        self.jobDirectory = make_job_dir(self.config['work_folder_path'],
                                         job['id'])
        self._log(
            log.INFO, 'Job ' + job['id'] + ' running with work folder ' +
            self.jobDirectory)

        # Download input fasta files from Shock and extract sequences to work directory.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'],
                                          'extracting sequence files', 1,
                                          timestamp(3600))
        except:
            pass
        resultList = []
        sequenceList = []
        for nodeId in self.input['node_ids']:
            node = self.shockClient.get_node(nodeId)
            sourceFile = os.path.join(self.jobDirectory, node['file']['name'])
            destFile = '%s.sequence' % (os.path.splitext(sourceFile)[0])
            if PairSeparator in destFile:  # Check for pair separator string in file name and replace as needed.
                destFile = destFile.replace(PairSeparator, '-')
            sequenceList.append(destFile)
            args = dict(
            )  # Needs to be scoped here so each process gets its own copy
            args['format'] = self.input['format']
            args['shockUrl'] = self.config['shock_url']
            args['auth'] = self.context['token']
            args['sequenceLen'] = self.input['sequence_length']
            args['minReads'] = self.input['min_reads']
            args['maxReads'] = self.input['max_reads']
            args['nodeId'] = nodeId
            args['sourceFile'] = sourceFile
            args['destFile'] = destFile
            result = self.pool.apply_async(extract_seq, (args, ))
            resultList.append(result)
        for result in resultList:
            if result.get() != 0:
                self._cleanup()
                raise ExtractError(
                    "Error extracting sequences from input sequence file, result: %d"
                    % (result.get()))
        for path in self.input['file_paths']:
            sourceFile = os.path.basename(path)
            destFile = '%s/%s.sequence' % (self.jobDirectory,
                                           os.path.splitext(sourceFile)[0])
            if PairSeparator in destFile:  # Check for pair separator string in file name and replace as needed.
                destFile = destFile.replace(PairSeparator, '-')
            sequenceList.append(destFile)
            args = dict(
            )  # Needs to be scoped here so each process gets its own copy
            args['format'] = self.input['format']
            args['shockUrl'] = self.config['shock_url']
            args['auth'] = self.context['token']
            args['sequenceLen'] = self.input['sequence_length']
            args['minReads'] = self.input['min_reads']
            args['maxReads'] = self.input['max_reads']
            args['nodeId'] = None
            args['sourceFile'] = path
            args['destFile'] = destFile
            result = self.pool.apply_async(extract_seq, (args, ))
            resultList.append(result)
        for result in resultList:
            try:
                result.get()
            except Exception as e:
                self._cleanup()
                raise ExtractError(
                    "Error extracting sequences from input sequence file: %s" %
                    (e.message))

        # Confirm that each file met the criteria for sequence length and number of sequences.
        filesToRemove = list()
        for index in range(len(sequenceList)):
            # See if the file did not have the minimum number of sequences.
            if not os.path.exists(sequenceList[index]):
                filesToRemove.append(index)
                continue

            # See if the file has no data.
            if os.path.getsize(sequenceList[index]) == 0:
                self._cleanup()
                raise SeqLenError("Sequence file '%s' has no sequences" %
                                  (sequenceList[index]))

        filteredList = list()
        for index in range(len(sequenceList)):
            if index not in filesToRemove:
                filteredList.append(sequenceList[index])
        if len(filteredList) < 2:
            self._cleanup()
            raise SeqLenError(
                "There are not enough sequence files that meet the sequence length or number of sequences criteria."
            )

        # Sort the sequences.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'],
                                          'sorting sequence files', 1,
                                          timestamp(3600))
        except:
            pass
        resultList = []
        sortedList = []
        for sourceFile in filteredList:
            destFile = '%s.sorted' % (os.path.splitext(sourceFile)[0])
            sortedList.append(destFile)
            args = ['/usr/bin/sort', '--output=%s' % (destFile), sourceFile]
            result = self.pool.apply_async(run_command, (args, ))
            resultList.append(result)
        for result in resultList:
            try:
                result.get()
            except CommandError as e:
                self._cleanup()
                raise SortError(
                    "Error sorting sequence file: %s\nCommand: '%s'\nStdout: '%s'\nStderr: '%s'"
                    % (e.message, e.cmd, e.stdout, e.stderr))

        # Create combined and sorted files.
        try:
            ujsClient.update_job_progress(
                job['id'], self.context['token'],
                'merging all pairs of sequence files', 1, timestamp(3600))
        except:
            pass
        resultList = []
        for p, q in combinations(sortedList, 2):
            pbase = os.path.basename(p)
            qbase = os.path.basename(q)
            dbase = '%s%s%s.sorted' % (os.path.splitext(pbase)[0],
                                       PairSeparator,
                                       os.path.splitext(qbase)[0])
            destFile = os.path.join(self.jobDirectory, dbase)
            sortedList.append(destFile)
            args = ['/usr/bin/sort', '-m', '--output=%s' % (destFile), p, q]
            result = self.pool.apply_async(run_command, (args, ))
            resultList.append(result)
        for result in resultList:
            try:
                result.get()
            except CommandError as e:
                self._cleanup()
                raise MergeError(
                    "Error merging sequence file: %s\nCommand: '%s'\nStdout: '%s'\nStderr: '%s'"
                    % (e.message, e.cmd, e.stdout, e.stderr))

        # Compress all sorted files.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'],
                                          'compressing sequence files', 1,
                                          timestamp(3600))
        except:
            pass
        resultList = []
        compressedList = []
        for sourceFile in sortedList:
            compressedList.append(sourceFile + '.xz')
            if self.input['extreme']:
                level = '-9e'
            else:
                level = '-9'
            args = ['/usr/bin/xz', '--keep', level, '--no-warn', sourceFile]
            result = self.pool.apply_async(run_command, (args, ))
            resultList.append(result)
        for result in resultList:
            try:
                result.get()
            except CommandError as e:
                self._cleanup()
                raise CompressError(
                    "Error compressing sequence file: %s\nCommand: '%s'\nStdout: '%s'\nStderr: '%s'"
                    % (e.message, e.cmd, e.stdout, e.stderr))

        # Calculate the distance matrix.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'],
                                          'calculating distance matrix', 1,
                                          timestamp(3600))
        except:
            pass
        csvFile = os.path.join(self.jobDirectory, '%s.csv' % (job['id']))
        self._cbdCalculator(compressedList, self.input['scale'], csvFile)

        # Store the output file in shock.
        try:
            ujsClient.update_job_progress(job['id'], self.context['token'],
                                          'storing output file in shock', 1,
                                          timestamp(3600))
        except:
            pass
        node = self.shockClient.create_node(csvFile, '')
        if not node['id']:
            # Shock let us down. Save the distance matrix in the work directory for possible recovery.
            os.rename(
                csvFile,
                '%s/%s.csv' % (self.config['work_folder_path'], job['id']))
            self._cleanup()
            raise ShockError(
                "Error saving distance matrix file to Shock. A Shock node was not created."
            )

        # Mark the job as complete.
        results = {
            'shocknodes': [node['id']],
            'shockurl': self.config['shock_url']
        }
        ujsClient.complete_job(job['id'], self.context['token'], 'done', None,
                               results)
        self._log(log.INFO, 'Job ' + job['id'] + ' completed successfully')

        # Cleanup after ourselves.
        self._cleanup()

        return
Exemple #33
0
class NarrativeJobProxy:
    '''
    Module Name:
    NarrativeJobProxy

    Module Description:
    Very simple proxy that reauthenticates requests to the user_and_job_state
service as the narrative user.

DO NOT DEPLOY PUBLICALLY
    '''

    ######## WARNING FOR GEVENT USERS #######
    # Since asynchronous IO can lead to methods - even the same method -
    # interrupting each other, you must be *very* careful when using global
    # state. A method could easily clobber the state set by another while
    # the latter method is running.
    #########################################
    #BEGIN_CLASS_HEADER
    UPDATE_TOKEN_INTERVAL = 24 * 60 * 60  # 1 day in sec
#    UPDATE_TOKEN_INTERVAL = 10

    def _update_token(self):
        if self._updating:
            return
        if (time.time() - self._updated_at < self.UPDATE_TOKEN_INTERVAL):
            return
        self._updating = True
        print('Updating token at ' + str(time.time()))
        try:
            self._ujs = UserAndJobState(self._url, user_id=self._user,
                                        password=self._pwd)
            self._updated_at = time.time()
        finally:  # otherwise token will never be updated
            self._updating = False

    #END_CLASS_HEADER

    # config contains contents of config file in a hash or None if it couldn't
    # be found
    def __init__(self, config):
        #BEGIN_CONSTRUCTOR
        self._user = config.get('narrative_user')
        self._pwd = config.get('narrative_user_pwd')
        if not self._user or not self._pwd:
            raise ValueError(
                'narrative user and/or narrative pwd missing from deploy.cfg')
        self._url = config.get('ujs_url')
        if not self._url:
            raise ValueError('UJS url missing from deploy.cfg')
        self._updated_at = - self.UPDATE_TOKEN_INTERVAL
        self._updating = False
        self._update_token()
        #END_CONSTRUCTOR
        pass

    def ver(self, ctx):
        # ctx is the context object
        # return variables are: ver
        #BEGIN ver
        ver = '0.0.1'
        #END ver

        # At some point might do deeper type checking...
        if not isinstance(ver, basestring):
            raise ValueError('Method ver return value ' +
                             'ver is not type basestring as required.')
        # return the results
        return [ver]

    def get_detailed_error(self, ctx, job):
        # ctx is the context object
        # return variables are: error
        #BEGIN get_detailed_error
        self._update_token()
        error = self._ujs.get_detailed_error(job)
        #END get_detailed_error

        # At some point might do deeper type checking...
        if not isinstance(error, basestring):
            raise ValueError('Method get_detailed_error return value ' +
                             'error is not type basestring as required.')
        # return the results
        return [error]

    def get_job_info(self, ctx, job):
        # ctx is the context object
        # return variables are: info
        #BEGIN get_job_info
        self._update_token()
        info = self._ujs.get_job_info(job)
        #END get_job_info

        # At some point might do deeper type checking...
        if not isinstance(info, list):
            raise ValueError('Method get_job_info return value ' +
                             'info is not type list as required.')
        # return the results
        return [info]
Exemple #34
0
                        dest='itmp',
                        default='infile')
    parser.add_argument('-g',
                        '--out_tmp_file',
                        help='Output temporary file name',
                        action='store',
                        dest='otmp',
                        default='outfile')

    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()

    kb_token = os.environ.get('KB_AUTH_TOKEN')
    ujs = UserAndJobState(url=args.ujs_url, token=kb_token)

    est = datetime.datetime.utcnow() + datetime.timedelta(minutes=3)
    if args.jid is not None:
        ujs.update_job_progress(args.jid, kb_token, 'Dispatched', 1,
                                est.strftime('%Y-%m-%dT%H:%M:%S+0000'))

    ## main loop
    args.opt_args = json.loads(args.opt_args)
    if 'uploader' not in args.opt_args:
        args.opt_args['uploader'] = {}
        args.opt_args['uploader']['file'] = args.otmp
        args.opt_args['uploader']['input'] = args.inobj_id
        args.opt_args['uploader']['jid'] = args.jid
        args.opt_args['uploader']['etype'] = args.etype
    uploader = Uploader(args)
Exemple #35
0
'''

if __name__ == "__main__":
    # Parse options.
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, prog='cbd-getmatrix', epilog=desc3)
    parser.add_argument('jobID', help='path to file with list of input sequence files', action='store', default=None)
    parser.add_argument('outputPath', help='path to output csv file', action='store', default=None)
    parser.add_argument('--show-times', help='show job start and end timestamps', action='store_true', dest='showTimes', default=False)
    parser.add_argument('--ujs-url', help='url for user and job state service', action='store', dest='ujsURL', default='https://kbase.us/services/userandjobstate')
    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()
    
    # Get the status of the specified job.
    ujsClient = UserAndJobState(args.ujsURL)
    try:
        info = job_info_dict(ujsClient.get_job_info(args.jobID))
    except JobStateServerError as e:
        print e.message
        exit(1)

    # Check if the job had an error.
    if info['error']:
        print "Job '%s' ended with error '%s' and no results are available." %(args.jobID, info['status'])
        print 'Error details:'
        print ujsClient.get_detailed_error(args.jobID)
        ujsClient.delete_job(args.jobID)
        exit(1)

    # Check if the job is complete.
Exemple #36
0
    # Parse options.
    parser = argparse.ArgumentParser(formatter_class=argparse.RawDescriptionHelpFormatter, prog='pa_checkjob', epilog=desc3)
    parser.add_argument('-j', '--jobID', help='job ID', action='store', dest='jobID', default=None)
    parser.add_argument('--ujs-url', help='url for user and job state service', action='store', dest='ujsURL', default='https://kbase.us/services/userandjobstate/')
    usage = parser.format_usage()
    parser.description = desc1 + '      ' + usage + desc2
    parser.usage = argparse.SUPPRESS
    args = parser.parse_args()
    
    # Get the list of jobs for the user.
    if 'KB_AUTH_USER_ID' in os.environ:
        userID = os.environ.get('KB_AUTH_USER_ID')
    else:
        auth = _read_inifile()
        userID = auth['user_id']
    ujsClient = UserAndJobState(args.ujsURL)
    try:
        jobList = ujsClient.list_jobs([ userID ], 'RCE')
    except JobStateServerError as e:
        print e.message
        exit(1)
    
    # See if the user has any jobs in the list.
    if len(jobList) == 0:
        print 'There are no jobs for you.'
        exit(1)

    # Print info about the specific job if requested.
    if args.jobID is not None:
        for job in jobList:
            info = job_info_dict(job)
Exemple #37
0
    if logger is None:
        raise Exception("A logger must be provided for status information.")

    kb_token = None
    try:            
        kb_token = script_utils.get_token()
        
        assert type(kb_token) == type(str())
    except Exception, e:
        logger.debug("Exception getting token!")
        raise

    ujs = None    
    try:
        if ujs_job_id is not None:    
            ujs = UserAndJobState(url=ujs_service_url, token=kb_token)
            ujs.get_job_status(ujs_job_id)
    except Exception, e:
        logger.debug("Exception talking to UJS!")
        raise

    # used for cleaning up the job if an exception occurs
    cleanup_details = {"keep_working_directory": keep_working_directory,
                       "working_directory": working_directory}

    # used for reporting a fatal condition
    error_object = {"ujs_client": ujs,
                    "ujs_job_id": ujs_job_id,
                    "token": kb_token}

    est = datetime.datetime.utcnow() + datetime.timedelta(hours=24)
if __name__ == "__main__":
    # Parse options.
    parser = argparse.ArgumentParser()
    parser.add_argument('--ujs_service_url', help='UJS url', action='store', default='https://kbase.us/services/userandjobstate')
    parser.add_argument('--ujs_job_id', help='UJS job id', action='store', default=None, required=False)
    parser.add_argument('--external_type', help='External object type', action='store', default=None, required=True)
    parser.add_argument('--optional_arguments', help='Optional argument json string', action='store')

    parser.add_argument('--working_directory', help='Support directory', action='store', default='lib')
    parser.add_argument('--delete_working_directory', help='Delete library directory', action='store_true')
    parser.add_argument('--in_tmp_file', help='Input temporary file name', action='store', default='infile')

    args = parser.parse_args()

    kb_token = os.environ.get('KB_AUTH_TOKEN')
    ujs = UserAndJobState(url=args.ujs_url, token=kb_token)

    est = datetime.datetime.utcnow() + datetime.timedelta(minutes=3)
    if args.jid is not None:
        ujs.update_job_progress(args.jid, kb_token, 'Dispatched', 1, est.strftime('%Y-%m-%dT%H:%M:%S+0000'))

    # main loop
    args.optional_arguments = json.loads(args.optional_arguments)

    validator = Validator(args)

    try:
        validator.validation_handler()
    except:
        e,v = sys.exc_info()[:2]
        if args.jid is not None: