def test_write_narrative_shared_write_access(self): if self.test_token is None or self.private_token is None: self.skipTest("Missing auth token(s)") # login as private_user # set unauth_nar perms to allow test_user w access # logout self.login(token=self.private_token) ws_client = clients.get("workspace") ws_client.set_permissions({ "id": self.unauth_nar["ws"], "new_permission": "w", "users": [self.test_user], }) self.logout() # login as test_user # re-save unauth_nar # should succeed # logout self.login(token=self.test_token) nar = self.mixin.read_narrative(self.unauth_nar["ref"])["data"] self.mixin.write_narrative(self.unauth_nar["ref"], nar, self.test_user) self.logout() # log back in as private_user # remove perms from test_user # log back out self.login(token=self.private_token) ws_client = clients.get("workspace") ws_client.set_permissions({ "id": self.unauth_nar["ws"], "new_permission": "n", "users": [self.test_user], }) self.logout()
def delete_job(self, job_id, parent_job_id=None): """ If the job_id doesn't exist, raises a ValueError. Attempts to delete a job, and cancels it first. If the job cannot be canceled, raises an exception. If it can be canceled but not deleted, it gets canceled, then raises an exception. """ if job_id is None: raise ValueError('Job id required for deletion!') if not parent_job_id and job_id not in self._running_jobs: self._send_comm_message('job_does_not_exist', {'job_id': job_id, 'source': 'delete_job'}) return # raise ValueError('Attempting to cancel a Job that does not exist!') try: self.cancel_job(job_id, parent_job_id=parent_job_id) except Exception: raise try: clients.get('user_and_job_state').delete_job(job_id) except Exception: raise if job_id in self._running_jobs: del self._running_jobs[job_id] if job_id in self._completed_job_states: del self._completed_job_states[job_id] self._send_comm_message('job_deleted', {'job_id': job_id})
def resolve_single_ref(workspace, value): ret = None if '/' in value: path_items = [item.strip() for item in value.split(';')] for path_item in path_items: if len(path_item.split('/')) > 3: raise ValueError( 'Object reference {} has too many slashes - should be workspace/object/version(optional)' .format(value)) # return (ws_ref, 'Data reference named {} does not have the right format - should be workspace/object/version(optional)') info = clients.get('workspace').get_object_info_new( {'objects': [{ 'ref': value }]})[0] path_items[len(path_items) - 1] = "{}/{}/{}".format( info[6], info[0], info[4]) ret = ';'.join(path_items) # Otherwise, assume it's a name, not a reference. else: info = clients.get('workspace').get_object_info_new( {'objects': [{ 'workspace': workspace, 'name': value }]})[0] ret = "{}/{}/{}".format(info[6], info[0], info[4]) return ret
def _create_jobs(self, job_ids): """ TODO: error handling Makes a bunch of Job objects from job_ids. Initially used to make Child jobs from some parent, but will eventually be adapted to all jobs on startup. Just slaps them all into _running_jobs """ job_states = clients.get('job_service').check_jobs({'job_ids': job_ids, 'with_job_params': 1}) for job_id in job_ids: ujs_info = clients.get('user_and_job_state').get_job_info2(job_id) if job_id in job_ids and job_id not in self._running_jobs: job_info = job_states.get('job_params', {}).get(job_id, {}) job_meta = ujs_info[10] job = Job.from_state(job_id, # the id job_info, # params, etc. ujs_info[2], # owner id app_id=job_info.get('app_id', job_info.get('method')), tag=job_meta.get('tag', 'release'), cell_id=job_meta.get('cell_id', None), run_id=job_meta.get('run_id', None), token_id=job_meta.get('token_id', None), meta=job_meta) # Note that when jobs for this narrative are initially loaded, # they are set to not be refreshed. Rather, if a client requests # updates via the start_job_update message, the refresh flag will # be set to True. self._running_jobs[job_id] = { 'refresh': 0, 'job': job }
def test_write_narrative_shared_write_access(self): if self.test_token is None or self.private_token is None: self.skipTest("Missing auth token(s)") # login as private_user # set unauth_nar perms to allow test_user w access # logout self.login(token=self.private_token) ws_client = clients.get('workspace') ws_client.set_permissions({'id': self.unauth_nar['ws'], 'new_permission': 'w', 'users': [self.test_user]}) self.logout() # login as test_user # re-save unauth_nar # should succeed # logout self.login(token=self.test_token) nar = self.mixin.read_narrative(self.unauth_nar['ref'])['data'] self.mixin.write_narrative(self.unauth_nar['ref'], nar, self.test_user) self.logout() # log back in as private_user # remove perms from test_user # log back out self.login(token=self.private_token) ws_client = clients.get('workspace') ws_client.set_permissions({'id': self.unauth_nar['ws'], 'new_permission': 'n', 'users': [self.test_user]}) self.logout()
def resolve_single_ref(workspace, value): ret = None if "/" in value: path_items = [item.strip() for item in value.split(";")] for path_item in path_items: if len(path_item.split("/")) > 3: raise ValueError( "Object reference {} has too many slashes - should be workspace/object/version(optional)" .format(value)) # return (ws_ref, 'Data reference named {} does not have the right format # - should be workspace/object/version(optional)') info = clients.get("workspace").get_object_info_new( {"objects": [{ "ref": value }]})[0] path_items[len(path_items) - 1] = "{}/{}/{}".format( info[6], info[0], info[4]) ret = ";".join(path_items) # Otherwise, assume it's a name, not a reference. else: info = clients.get("workspace").get_object_info_new( {"objects": [{ "workspace": workspace, "name": value }]})[0] ret = "{}/{}/{}".format(info[6], info[0], info[4]) return ret
def test_write_narrative_shared_write_access(self): if self.test_token is None or self.private_token is None: self.skipTest("Missing auth token(s)") # login as private_user # set unauth_nar perms to allow test_user w access # logout self.login(token=self.private_token) ws_client = clients.get('workspace') ws_client.set_permissions({ 'id': self.unauth_nar['ws'], 'new_permission': 'w', 'users': [self.test_user] }) self.logout() # login as test_user # re-save unauth_nar # should succeed # logout self.login(token=self.test_token) nar = self.mixin.read_narrative(self.unauth_nar['ref'])['data'] self.mixin.write_narrative(self.unauth_nar['ref'], nar, self.test_user) self.logout() # log back in as private_user # remove perms from test_user # log back out self.login(token=self.private_token) ws_client = clients.get('workspace') ws_client.set_permissions({ 'id': self.unauth_nar['ws'], 'new_permission': 'n', 'users': [self.test_user] }) self.logout()
def cancel_job(self, job_id, parent_job_id=None): """ Cancels a running job, placing it in a canceled state. Does NOT delete the job. Raises an exception if the current user doesn't have permission to cancel the job. """ if job_id is None: raise ValueError('Job id required for cancellation!') if not parent_job_id and job_id not in self._running_jobs: self._send_comm_message('job_does_not_exist', { 'job_id': job_id, 'source': 'cancel_job' }) return try: state = self._get_job_state(job_id, parent_job_id=parent_job_id) if state.get('canceled', 0) == 1 or state.get('finished', 0) == 1: # It's already finished, don't try to cancel it again. return except Exception as e: raise ValueError('Unable to get Job state') # Stop updating the job status while we try to cancel. # Also, set it to have a special state of 'canceling' while we're doing the cancel if not parent_job_id: is_refreshing = self._running_jobs[job_id].get('refresh', 0) self._running_jobs[job_id]['refresh'] = 0 self._running_jobs[job_id]['canceling'] = True try: clients.get('job_service').cancel_job({'job_id': job_id}) except Exception as e: new_e = transform_job_exception(e) error = { 'error': 'Unable to get cancel job', 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'request_type': 'cancel_job', 'job_id': job_id } self._send_comm_message('job_comm_error', error) raise (e) finally: if not parent_job_id: self._running_jobs[job_id]['refresh'] = is_refreshing del self._running_jobs[job_id]['canceling'] # Rather than a separate message, how about triggering a job-status message: self._lookup_job_status(job_id, parent_job_id=parent_job_id)
def _cancel_job(self, job_id: str) -> None: # Stop updating the job status while we try to cancel. # Set the job to a special state of 'canceling' while we're doing the cancel is_refreshing = self._running_jobs[job_id].get("refresh", False) self._running_jobs[job_id]["refresh"] = False self._running_jobs[job_id]["canceling"] = True error = None try: clients.get("execution_engine2").cancel_job({"job_id": job_id}) except Exception as e: error = transform_job_exception(e, "Unable to cancel job") self._running_jobs[job_id]["refresh"] = is_refreshing del self._running_jobs[job_id]["canceling"] return error
def cancel_job(self, job_id): """ Cancels a running job, placing it in a canceled state. Does NOT delete the job. Raises an exception if the current user doesn't have permission to cancel the job. """ if job_id is None: raise ValueError('Job id required for cancellation!') if job_id not in self._running_jobs: self._send_comm_message('job_does_not_exist', {'job_id': job_id, 'source': 'cancel_job'}) return try: job = self.get_job(job_id) state = job.state() if state.get('canceled', 0) == 1 or state.get('finished', 0) == 1: # It's already finished, don't try to cancel it again. return except Exception as e: raise ValueError('Unable to get Job state') # Stop updating the job status while we try to cancel. # Also, set it to have a special state of 'canceling' while we're doing the cancel is_refreshing = self._running_jobs[job_id].get('refresh', False) self._running_jobs[job_id]['refresh'] = False self._running_jobs[job_id]['canceling'] = True try: clients.get('job_service').cancel_job({'job_id': job_id}) except Exception as e: new_e = transform_job_exception(e) error = { 'error': 'Unable to get cancel job', 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'request_type': 'cancel_job', 'job_id': job_id } self._send_comm_message('job_comm_error', error) raise(e) finally: self._running_jobs[job_id]['refresh'] = is_refreshing del self._running_jobs[job_id]['canceling'] # # self._send_comm_message('job_canceled', {'job_id': job_id}) # Rather than a separate message, how about triggering a job-status message: self._lookup_job_status(job_id)
def _get_narrative_objid(self): """ Attempts to find the Narrative object id given a workspace id. This is only called on the internal wsid, which must be an int. Can raise: - PermissionsError - if the current user doesn't have access to that workspace - RuntimeError - if there's anything wrong with the workspace metadata that's supposed to contain the narrative object id (either missing or not an int) - ServerError - if anything else bad happens from the Workspace """ objid = None try: ws_meta = clients.get("workspace").get_workspace_info({"id": self.wsid})[8] objid = ws_meta.get("narrative") return int(objid) except (ValueError, TypeError): err = "" if objid is None: err = "Couldn't find Narrative object id in Workspace metadata." else: err = ("Expected an integer while looking up the Narrative object id, " \ "got '{}'".format(objid)) raise RuntimeError(err) except ServerError as err: raise WorkspaceError(err, self.wsid)
def list_objects(obj_type=None, name=None, fuzzy_name=True): """ Returns a list of all objects in the current workspace with type=obj_type obj_type is a string. if None, return all visible objects (no reports, data palettes, etc.) name is a string. if None, then return everything. if not None, use that string to filter the search. if fuzzy_name is set to True, use that string as a search filter. e.g., "foo" would match "Foobar" and "Bazfoo" However, it doesn't go the other way. If name="Foobar" it will not match an object named "foo" If fuzzy_name is False, only exact (case-insensitive) matches are allowed. This has limited use, I know, but it's useful for fetching UPAs for objects you know, or names you're iterating over another way. This first prototype just returns a list of dictionaries, where each dict contains 'type', 'upa', and 'name' keys for each object. """ ws_name = system_variable('workspace') service = clients.get('service') service_params = {'ws_name': ws_name} if obj_type is not None: # matches: # foo.bar # foo.bar-1.0 # doesn't match: # foo # foo.bar- # foobar- # foo.bar-1.2.0 if not re.match(r"[A-Za-z]+\.[A-Za-z]+(-\d+\.\d+)?$", obj_type): raise ValueError( '{} is not a valid type. Valid types are of the format "Module.Type" or "Module.Type-Version"' .format(obj_type)) service_params['types'] = [obj_type] all_obj = service.sync_call('NarrativeService.list_objects_with_sets', [service_params])[0] obj_list = list() for obj in all_obj['data']: # filtration! # 1. ignore narratives if 'KBaseNarrative.Narrative' in obj['object_info'][2]: continue # 2. name filter if name is not None: name = str(name).lower() # if we're not strict, just search for the string if fuzzy_name is True and name not in obj['object_info'][1].lower( ): continue elif fuzzy_name is False and name != obj['object_info'][1].lower(): continue upa_prefix = '' # gavin's gonna wreck me. if 'dp_info' in obj: # seriously. upa_prefix = obj['dp_info'][ 'ref'] + ';' # not like I want to support this, either... info = obj['object_info'] obj_list.append({ "upa": "{}{}/{}/{}".format(upa_prefix, info[6], info[0], info[4]), "name": info[1], "type": info[2] }) return obj_list
def state(self): """ Queries the job service to see the status of the current job. Returns a <something> stating its status. (string? enum type? different traitlet?) """ if self._last_state is not None and self._last_state.get('status') in [ 'completed', 'terminated', 'error' ]: return self._last_state try: state = clients.get('execution_engine2').check_job({ 'job_id': self.job_id, 'exclude_fields': EXCLUDED_JOB_STATE_FIELDS }) state['job_output'] = state.get('job_output', {}) state['cell_id'] = self.cell_id state['run_id'] = self.run_id state['token_id'] = self.token_id self._last_state = state return dict(state) except Exception as e: raise Exception( f"Unable to fetch info for job {self.job_id} - {e}")
def _get_narrative_objid(self): """ Attempts to find the Narrative object id given a workspace id. This is only called on the internal wsid, which must be an int. Can raise: - PermissionsError - if the current user doesn't have access to that workspace - RuntimeError - if there's anything wrong with the workspace metadata that's supposed to contain the narrative object id (either missing or not an int) - ServerError - if anything else bad happens from the Workspace """ objid = None try: ws_meta = clients.get("workspace").get_workspace_info( {"id": self.wsid})[8] objid = ws_meta.get("narrative") return int(objid) except (ValueError, TypeError): err = "" if objid is None: err = "Couldn't find Narrative object id in Workspace metadata." else: err = ("Expected an integer while looking up the Narrative object id, " \ "got '{}'".format(objid)) raise RuntimeError(err) except ServerError as err: raise WorkspaceError(err, self.wsid)
def get_df(ws_ref, col_attributes=(), row_attributes=(), clustergrammer=False): """ Gets a dataframe from the WS object :param ws_ref: The Workspace reference of the 2DMatrix containing object :param col_attributes: Which column attributes should appear in the resulting DataFrame as a multiIndex. Defaults to all attributes, pass None to use a simple index of only ID. :param row_attributes: Which row attributes should appear in the resulting DataFrame as a multiIndex. Defaults to all attributes, pass None to use a simple index of only ID. :param clustergrammer: Returns a DataFrame with Clustergrammer compatible indices and columns. Defaults to False. :return: A Pandas DataFrame """ ws = clients.get('workspace') if "/" not in ws_ref: ws_ref = "{}/{}".format(system_variable('workspace'), ws_ref) generic_data = ws.get_objects2({'objects': [{'ref': ws_ref}]})['data'][0]['data'] if not _is_compatible_matrix(generic_data): raise ValueError("{} is not a compatible data type for this viewer. Data type must " "contain a 'data' key with a FloatMatrix2D type value".format(ws_ref)) cols = _get_categories(generic_data['data']['col_ids'], ws_ref, generic_data.get('col_attributemapping_ref'), generic_data.get('col_mapping'), col_attributes, clustergrammer) rows = _get_categories(generic_data['data']['row_ids'], ws_ref, generic_data.get('row_attributemapping_ref'), generic_data.get('row_mapping'), row_attributes, clustergrammer) return pd.DataFrame(data=generic_data['data']['values'], columns=cols, index=rows)
def state(self): """ Queries the job service to see the status of the current job. Returns a <something> stating its status. (string? enum type? different traitlet?) """ if self._last_state is not None and self._last_state.get("status") in [ "completed", "terminated", "error", ]: return self._last_state try: state = clients.get("execution_engine2").check_job({ "job_id": self.job_id, "exclude_fields": EXCLUDED_JOB_STATE_FIELDS }) state["job_output"] = state.get("job_output", {}) state["cell_id"] = self.cell_id state["run_id"] = self.run_id state["token_id"] = self.token_id self._last_state = state return dict(state) except Exception as e: raise Exception( f"Unable to fetch info for job {self.job_id} - {e}")
def _run_dynamic_service_internal(self, app_id, params, tag, version, cell_id, run_id, **kwargs): # Intro tests: self.spec_manager.check_app(app_id, tag, raise_exception=True) if version is not None and tag != "release": raise ValueError( "App versions only apply to released app modules!") # Get the spec & params spec = self.spec_manager.get_spec(app_id, tag) if 'behavior' not in spec: raise ValueError( "This app appears invalid - it has no defined behavior") behavior = spec['behavior'] if 'script_module' in behavior or 'script_name' in behavior: # It's an old NJS script. These don't work anymore. raise ValueError( 'This app relies on a service that is now obsolete. Please contact the administrator.' ) # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'username': system_variable('user_id'), 'ws': system_variable('workspace') } kblogging.log_event(self._log, "run_dynamic_service", log_info) # Silly to keep this here, but we do not validate the incoming parameters. # If they are provided by the UI (we have cell_id), they are constructed # according to the spec, so are trusted; # Otherwise, if they are the product of direct code cell entry, this is a mode we do not # "support", so we can let it fail hard. # In the future when code cell interaction is supported for users, we will need to provide # robust validation and error reporting, but this may end up being (should be) provided by the # sdk execution infrastructure anyway input_vals = params function_name = spec['behavior']['kb_service_name'] + '.' + spec[ 'behavior']['kb_service_method'] try: result = clients.get("service").sync_call(function_name, input_vals, service_version=tag)[0] # if a ui call (a cell_id is defined) we send a result message, otherwise # just the raw result for display in a code cell. This is how we "support" # code cells for internal usage. if cell_id: self.send_cell_message('result', cell_id, run_id, {'result': result}) else: return result except: raise
def _update_log(self): log_update = clients.get("execution_engine2").get_job_logs({ "job_id": self.job_id, "skip_lines": len(self._job_logs) }) if log_update["lines"]: self._job_logs = self._job_logs + log_update["lines"]
def _get_all_job_states(self, job_ids=None): """ Returns the state for all running jobs. Returns a list where each element has this structure: { cell_id: (optional) id of the cell that spawned the job run_id: (optional) id of the job run awe_job_state: string creation_time: timestamp (ms since epoch) finished: 0/1 job_id: string job_state: string status: [ timestamp, _, _, _, _, _, _ ], (7-tuple) sub_jobs: [], ujs_url: string, child_jobs: [] } """ # 1. Get list of ids if job_ids is None: job_ids = self._running_jobs.keys() # 1.5 Go through job ids and remove ones that aren't found. job_ids = [j for j in job_ids if j in self._running_jobs] # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id # for batch lookup. job_states = dict() jobs_to_lookup = list() for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = dict(self._completed_job_states[job_id]) else: jobs_to_lookup.append(job_id) # 3. Lookup those jobs what need it. Cache 'em as we go, if finished. try: fetched_states = clients.get('job_service').check_jobs( {'job_ids': jobs_to_lookup}) fetched_states = sanitize_all_states(fetched_states) except Exception as e: kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)}) return {} error_states = fetched_states.get('check_error', {}) fetched_states = fetched_states.get('job_states', {}) for job_id in jobs_to_lookup: if job_id in fetched_states: state = fetched_states[job_id] state['cell_id'] = self._running_jobs[job_id]['job'].cell_id state['run_id'] = self._running_jobs[job_id]['job'].run_id if state.get('finished', 0) == 1: self._completed_job_states[state['job_id']] = dict(state) job_states[state['job_id']] = state elif job_id in error_states: error = error_states[job_id] job_states[state['job_id']] = {'lookup_error': error} return job_states
def _update_log(self): log_update = clients.get("job_service").get_job_logs({ 'job_id': self.job_id, 'skip_lines': len(self._job_logs) }) if log_update['lines']: self._job_logs = self._job_logs + log_update['lines']
def _update_log(self): log_update = clients.get("execution_engine2").get_job_logs({ 'job_id': self.job_id, 'skip_lines': len(self._job_logs) }) if log_update['lines']: self._job_logs = self._job_logs + log_update['lines']
def generate_app_cell(validated_spec=None, spec_tuple=None): """Produces an invisible blob of JavaScript that inserts a new cell in the notebook, and crams the validated_spec in it. It then removes itself, so it won't happen again on page reload. For the inputs, validated_spec > spec_tuple. That is, if validated_spec is present, that's always used. if spec_tuple is there, and validated_spec is not, then the tuple's used. Also, the tuple should be (spec_json, display_yaml), all as strings. """ if spec_tuple is not None and validated_spec is None: nms = clients.get("narrative_method_store") validated = nms.validate_method({ "id": "some_test_app", "spec_json": spec_tuple[0], "display_yaml": spec_tuple[1], }) if validated.get("is_valid", 0) == 1: validated_spec = validated["method_spec"] elif "errors" in validated and validated["errors"]: raise Exception(validated["errors"]) # Each of the values of the validated spec needs to be escaped for JS. # Specifically we turn " -> " and ' -> ' # This isn't done so much on the frontend because of how it's already interpreted and # injected into the cell metadata, # but it's necessary for this little function. if "info" in validated_spec: for key in ["name", "subtitle", "tooltip"]: validated_spec["info"][key] = _fix_quotes( validated_spec["info"].get(key, "")) if "parameters" in validated_spec: for i in range(len(validated_spec["parameters"])): p = validated_spec["parameters"][i] for key in ["ui_name", "short_hint", "description"]: p[key] = _fix_quotes(p.get(key, "")) js_template = """ var outputArea = this, cellElement = outputArea.element.parents('.cell'), cellIdx = Jupyter.notebook.get_cell_elements().index(cellElement), thisCell = Jupyter.notebook.get_cell(cellIdx), spec_json = '{{spec}}', cellData = { type: 'devapp', appTag: 'dev', appSpec: JSON.parse(spec_json) }; Jupyter.narrative.insertAndSelectCell('code', 'below', cellIdx, cellData); """ js_code = Template(js_template).render(spec=json.dumps(validated_spec)) return Javascript(data=js_code, lib=None, css=None)
def query_ee2_state( job_id: str, init: bool = True, ) -> dict: return clients.get("execution_engine2").check_job({ "job_id": job_id, "exclude_fields": (JOB_INIT_EXCLUDED_JOB_STATE_FIELDS if init else EXCLUDED_JOB_STATE_FIELDS), })
def _get_all_job_states(self, job_ids=None): """ Returns the state for all running jobs. Returns a list where each element has this structure: { cell_id: (optional) id of the cell that spawned the job run_id: (optional) id of the job run awe_job_state: string creation_time: timestamp (ms since epoch) finished: 0/1 job_id: string job_state: string status: [ timestamp, _, _, _, _, _, _ ], (7-tuple) sub_jobs: [], ujs_url: string, child_jobs: [] } """ # 1. Get list of ids if job_ids is None: job_ids = self._running_jobs.keys() # 1.5 Go through job ids and remove ones that aren't found. job_ids = [j for j in job_ids if j in self._running_jobs] # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id # for batch lookup. job_states = dict() jobs_to_lookup = list() for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = dict(self._completed_job_states[job_id]) else: jobs_to_lookup.append(job_id) # 3. Lookup those jobs what need it. Cache 'em as we go, if finished. try: fetched_states = clients.get('job_service').check_jobs({'job_ids': jobs_to_lookup}) fetched_states = sanitize_all_states(fetched_states) except Exception as e: kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)}) return {} error_states = fetched_states.get('check_error', {}) fetched_states = fetched_states.get('job_states', {}) for job_id in jobs_to_lookup: if job_id in fetched_states: state = fetched_states[job_id] state['cell_id'] = self._running_jobs[job_id]['job'].cell_id state['run_id'] = self._running_jobs[job_id]['job'].run_id if state.get('finished', 0) == 1: self._completed_job_states[state['job_id']] = dict(state) job_states[state['job_id']] = state elif job_id in error_states: error = error_states[job_id] job_states[state['job_id']] = {'lookup_error': error} return job_states
def _get_categories( ids, matrix_ref, attributemapping_ref=None, mapping=None, whitelist=(), clustergrammer=False, ): """Creates the correct kind of multi-factor index for clustergrammer display""" if not attributemapping_ref or whitelist is None: return ids cat_list = [] ws = clients.get("workspace") attribute_data = ws.get_objects2( {"objects": [{"ref": matrix_ref + ";" + attributemapping_ref}]} )["data"][0]["data"] if not mapping: mapping = {x: x for x in ids} whitelist = set(whitelist) for _id in ids: try: attribute_values = attribute_data["instances"][mapping[_id]] except KeyError: if _id not in mapping: raise ValueError( "Row or column id {} is not in the provided mapping".format(_id) ) raise ValueError( "AttributeMapping {} has no attribute {} which corresponds to row or " "column id {} in the provided object.".format( attributemapping_ref, mapping[_id], _id ) ) cats = [_id] for i, val in enumerate(attribute_values): cat_name = attribute_data["attributes"][i]["attribute"] if whitelist and cat_name not in whitelist: continue if clustergrammer: cats.append("{}: {}".format(cat_name, val)) else: cats.append(val) cat_list.append(tuple(cats)) if clustergrammer: return cat_list attribute_names = [ x["attribute"] for x in attribute_data["attributes"] if not whitelist or x["attribute"] in whitelist ] return pd.MultiIndex.from_tuples(cat_list, names=["ID"] + attribute_names)
def reload(self): """ Reloads all app specs into memory from the latest update. """ for tag in app_version_tags: specs = clients.get('narrative_method_store').list_methods_spec({'tag': tag}) spec_dict = dict() for spec in specs: spec_dict[spec['info']['id']] = spec self.app_specs[tag] = spec_dict
def test_valid_clients(self): name_to_type = { "workspace": WS_Client, "execution_engine2": EE2_Client, "narrative_method_store": NMS_Client, "service": Service_Client, "catalog": Catalog_Client, } for client_name, client_type in name_to_type.items(): client = clients.get(client_name) self.assertIsInstance(client, client_type)
def retry_jobs(self, job_id_list: List[str]) -> dict: """ Returns [ { "job_id": job_id, "job": {"state": {"job_id": job_id, "status": status, ...} ...}, "retry_id": retry_id, "retry": {"state": {"job_id": retry_id, "status": status, ...} ...} }, { "job": {"state": {"job_id": job_id, "status": status, ...} ...}, "error": "..." } ... { "job": {"state": {"job_id": job_id, "status": DOES_NOT_EXIST}}, "error": f"Cannot find job with ID {job_id}", } ] where the innermost dictionaries are job states from ee2 and are within the job states from job.output_state() """ job_ids, error_ids = self._check_job_list(job_id_list) try: retry_results = clients.get("execution_engine2").retry_jobs( {"job_ids": job_ids} ) except Exception as e: raise transform_job_exception(e, "Unable to retry job(s)") # for each retry result, refresh the state of the retried and new jobs orig_ids = [result["job_id"] for result in retry_results] retry_ids = [ result["retry_id"] for result in retry_results if "retry_id" in result ] orig_states = self._construct_job_output_state_set(orig_ids) retry_states = self._construct_job_output_state_set( retry_ids, self._create_jobs(retry_ids) # add to self._running_jobs index ) job_states = {**orig_states, **retry_states} results_by_job_id = {} # fill in the job state details for result in retry_results: job_id = result["job_id"] results_by_job_id[job_id] = {"job_id": job_id, "job": job_states[job_id]} if "retry_id" in result: retry_id = result["retry_id"] results_by_job_id[job_id]["retry_id"] = retry_id results_by_job_id[job_id]["retry"] = job_states[retry_id] if "error" in result: results_by_job_id[job_id]["error"] = result["error"] return self.add_errors_to_results(results_by_job_id, error_ids)
def cancel_job(self, job_id: str, parent_job_id: str = None) -> None: """ Cancels a running job, placing it in a canceled state. Does NOT delete the job. if the job_id is None or not found in this Narrative, a ValueError is raised. This then checks the job to see if it is already canceled/finished, then attempts to cancel it. If either of those steps fail, a NarrativeException is raised. """ if job_id is None: raise ValueError('Job id required for cancellation!') if not parent_job_id and job_id not in self._running_jobs: raise ValueError(f"No job present with id {job_id}") try: cancel_status = clients.get( "execution_engine2").check_job_canceled({"job_id": job_id}) if cancel_status.get("finished", 0) == 1 or cancel_status.get( "canceled", 0) == 1: # It's already finished, don't try to cancel it again. return except Exception as e: raise transform_job_exception(e) # Stop updating the job status while we try to cancel. # Also, set it to have a special state of 'canceling' while we're doing the cancel if not parent_job_id: is_refreshing = self._running_jobs[job_id].get('refresh', 0) self._running_jobs[job_id]['refresh'] = 0 self._running_jobs[job_id]['canceling'] = True try: clients.get('execution_engine2').cancel_job({'job_id': job_id}) except Exception as e: raise transform_job_exception(e) finally: if not parent_job_id: self._running_jobs[job_id]['refresh'] = is_refreshing del self._running_jobs[job_id]['canceling']
def reload(self): """ Reloads all app specs into memory from the latest update. """ client = clients.get("narrative_method_store") for tag in app_version_tags: specs = client.list_methods_spec({"tag": tag}) spec_dict = dict() for spec in specs: spec_dict[spec["info"]["id"]] = spec self.app_specs[tag] = spec_dict # And let's load all types from the beginning and cache them self.type_specs = client.list_categories({"load_types": 1})[3]
def reload(self): """ Reloads all app specs into memory from the latest update. """ client = clients.get('narrative_method_store') for tag in app_version_tags: specs = client.list_methods_spec({'tag': tag}) spec_dict = dict() for spec in specs: spec_dict[spec['info']['id']] = spec self.app_specs[tag] = spec_dict # And let's load all types from the beginning and cache them self.type_specs = client.list_categories({'load_types': 1})[3]
def query_ee2_states( job_ids: List[str], init: bool = True, ) -> dict: if not job_ids: return {} return clients.get("execution_engine2").check_jobs({ "job_ids": job_ids, "exclude_fields": (JOB_INIT_EXCLUDED_JOB_STATE_FIELDS if init else EXCLUDED_JOB_STATE_FIELDS), "return_list": 0, })
def __init__(self, job_id, app_id, inputs, owner, tag='release', app_version=None, cell_id=None, run_id=None): """ Initializes a new Job with a given id, app id, and app app_version. The app_id and app_version should both align with what's available in the Narrative Method Store service. """ self.job_id = job_id self.app_id = app_id self.app_version = app_version self.tag = tag self.cell_id = cell_id self.run_id = run_id self.inputs = inputs self.owner = owner self._njs = clients.get('job_service')
def initialize_jobs(self): """ Initializes this JobManager. This is expected to be run by a running Narrative, and naturally linked to a workspace. So it does the following steps. 1. app_util.system_variable('workspace_id') 2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id) 3. initialize the Job objects by running NJS.get_job_params (also gets app_id) 4. start the status lookup loop. """ ws_id = system_variable("workspace_id") job_states = dict() kblogging.log_event(self._log, "JobManager.initialize_jobs", {"ws_id": ws_id}) try: job_states = clients.get("execution_engine2").check_workspace_jobs( { "workspace_id": ws_id, "return_list": 0 }) self._running_jobs = dict() except Exception as e: kblogging.log_event(self._log, "init_error", {"err": str(e)}) new_e = transform_job_exception(e) raise new_e for job_id, job_state in job_states.items(): job_input = job_state.get("job_input", {}) job_meta = job_input.get("narrative_cell_info", {}) status = job_state.get("status") job = Job.from_state( job_id, job_input, job_state.get("user"), app_id=job_input.get("app_id"), tag=job_meta.get("tag", "release"), cell_id=job_meta.get("cell_id", None), run_id=job_meta.get("run_id", None), token_id=job_meta.get("token_id", None), meta=job_meta, ) self._running_jobs[job_id] = { "refresh": 1 if status not in ["completed", "errored", "terminated"] else 0, "job": job, }
def parameters(self): """ Returns the parameters used to start the job. Job tries to use its inputs field, but if that's None, then it makes a call to njs. If no exception is raised, this only returns the list of parameters, NOT the whole object fetched from NJS.get_job_params """ if self.inputs is not None: return self.inputs else: try: self.inputs = clients.get("job_service").get_job_params(self.job_id)[0]['params'] return self.inputs except Exception as e: raise Exception("Unable to fetch parameters for job {} - {}".format(self.job_id, e))
def state(self): """ Queries the job service to see the status of the current job. Returns a <something> stating its status. (string? enum type? different traitlet?) """ if self._last_state is not None and self._last_state.get('finished', 0) == 1: return self._last_state try: state = sanitize_state(clients.get("job_service").check_job(self.job_id)) state[u'cell_id'] = self.cell_id state[u'run_id'] = self.run_id state[u'token_id'] = self.token_id self._last_state = state return dict(state) except Exception as e: raise Exception("Unable to fetch info for job {} - {}".format(self.job_id, e))
def _construct_job_status_set(self, job_ids: list) -> dict: """ Builds a set of job states for the list of job ids. """ # if cached, use 'em. # otherwise, lookup. # do transform # cache terminal ones. # return all. if not isinstance(job_ids, list): raise ValueError("job_ids must be a list") if job_ids is None: job_ids = self._running_jobs.keys() job_states = dict() jobs_to_lookup = list() # Fetch from cache of terminated jobs, where available. # These are already post-processed and ready to return. for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = self._completed_job_states[job_id] else: jobs_to_lookup.append(job_id) fetched_states = dict() # Get the rest of states direct from EE2. if len(jobs_to_lookup): try: fetched_states = clients.get("execution_engine2").check_jobs({ "job_ids": jobs_to_lookup, "exclude_fields": EXCLUDED_JOB_STATE_FIELDS, "return_list": 0 }) except Exception as e: kblogging.log_event(self._log, "construct_job_status_set", {"err": str(e)}) for job_id, state in fetched_states.items(): revised_state = self._construct_job_status(self.get_job(job_id), state) if revised_state["state"]["status"] in TERMINAL_STATES: self._completed_job_states[job_id] = revised_state job_states[job_id] = revised_state return job_states
def app_description(self, app_id, tag='release'): """ Returns the app description as a printable object. Makes it kinda pretty? repr_html, maybe? """ self.check_app(app_id, tag, raise_exception=True) info = clients.get('narrative_method_store').get_method_full_info({'ids': [app_id], 'tag': tag})[0] tmpl = """ <div class="bg-info" style="padding:15px"> <h1>{{info.name}} <small>{{info.module_name}}</small></h1> <p class='lead'>{{info.id}} - v{{info.ver}}</p> </div> <p class='lead'>{{info.subtitle}}</p> <hr> {{info.description}} """ return HTML(Template(tmpl).render(info=info))
def _child_job_states(self, sub_job_list, app_id, app_tag): """ Fetches state for all jobs in the list. These are expected to be child jobs, with no actual Job object associated. So if they're done, we need to do the output mapping out of band. But the check_jobs call with params will return the app id. So that helps. app_id = the id of the app that all the child jobs are running (format: module/method, like "MEGAHIT/run_megahit") app_tag = one of "release", "beta", "dev" (the above two aren't stored with the subjob metadata, and won't until we back some more on KBParallel - I want to lobby for pushing toward just starting everything up at once from here and letting HTCondor deal with allocation) sub_job_list = list of ids of jobs to look up """ if not sub_job_list: return [] sub_job_list = sorted(sub_job_list) job_info = clients.get('job_service').check_jobs({'job_ids': sub_job_list, 'with_job_params': 1}) job_info = sanitize_all_states(job_info) child_job_states = list() for job_id in sub_job_list: params = job_info['job_params'][job_id] # if it's error, get the error. if job_id in job_info['check_error']: error = job_info['check_error'][job_id] error.update({'job_id': job_id}) child_job_states.append(error) continue # if it's done, get the output mapping. state = job_info['job_states'][job_id] if state.get('finished', 0) == 1: try: widget_info = Job.map_viewer_params( state, params['params'], app_id, app_tag ) except ValueError: widget_info = {} state.update({'widget_info': widget_info}) child_job_states.append(state) return child_job_states
def _run_dynamic_service_internal(self, app_id, params, tag, version, cell_id, run_id): spec = self._get_validated_app_spec(app_id, tag, False, version=version) # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'username': system_variable('user_id'), 'ws': system_variable('workspace') } kblogging.log_event(self._log, "run_dynamic_service", log_info) # Silly to keep this here, but we do not validate the incoming parameters. # If they are provided by the UI (we have cell_id), they are constructed # according to the spec, so are trusted; # Otherwise, if they are the product of direct code cell entry, this is a mode we do not # "support", so we can let it fail hard. # In the future when code cell interaction is supported for users, we will need to provide # robust validation and error reporting, but this may end up being (should be) provided by the # sdk execution infrastructure anyway input_vals = params function_name = spec['behavior']['kb_service_name'] + '.' + spec['behavior']['kb_service_method'] try: result = clients.get("service").sync_call( function_name, input_vals, service_version=tag )[0] # if a ui call (a cell_id is defined) we send a result message, otherwise # just the raw result for display in a code cell. This is how we "support" # code cells for internal usage. if cell_id: self.send_cell_message('result', cell_id, run_id, { 'result': result }) else: return result except: raise
def _get_all_job_states(self, job_ids=None): """ Returns the state for all running jobs """ # 1. Get list of ids if job_ids is None: job_ids = self._running_jobs.keys() # 1.5 Go through job ids and remove ones that aren't found. job_ids = [j for j in job_ids if j in self._running_jobs] # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id # for batch lookup. job_states = dict() jobs_to_lookup = list() for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = dict(self._completed_job_states[job_id]) else: jobs_to_lookup.append(job_id) # 3. Lookup those jobs what need it. Cache 'em as we go, if finished. try: fetched_states = clients.get('job_service').check_jobs({'job_ids': jobs_to_lookup}) except Exception as e: kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)}) return {} error_states = fetched_states.get('check_errors', {}) fetched_states = fetched_states.get('job_states', {}) for job_id in jobs_to_lookup: if job_id in fetched_states: state = fetched_states[job_id] state['cell_id'] = self._running_jobs[job_id]['job'].cell_id state['run_id'] = self._running_jobs[job_id]['job'].run_id if state.get('finished', 0) == 1: self._completed_job_states[state['job_id']] = dict(state) job_states[state['job_id']] = state elif job_id in error_states: error = error_states[job_id] job_states[state['job_id']] = {'lookup_error': error} return job_states
def _get_categories(ids, matrix_ref, attributemapping_ref=None, mapping=None, whitelist=(), clustergrammer=False): """Creates the correct kind of multi-factor index for clustergrammer display""" if not attributemapping_ref or whitelist is None: return ids cat_list = [] ws = clients.get('workspace') attribute_data = ws.get_objects2( {'objects': [{'ref': matrix_ref + ";" + attributemapping_ref}]})['data'][0]['data'] if not mapping: mapping = {x: x for x in ids} whitelist = set(whitelist) for _id in ids: try: attribute_values = attribute_data['instances'][mapping[_id]] except KeyError: if _id not in mapping: raise ValueError("Row or column id {} is not in the provided mapping".format(_id)) raise ValueError("AttributeMapping {} has no attribute {} which corresponds to row or " "column id {} in the provided object.".format(attributemapping_ref, mapping[_id], _id)) cats = [_id] for i, val in enumerate(attribute_values): cat_name = attribute_data['attributes'][i]['attribute'] if whitelist and cat_name not in whitelist: continue if clustergrammer: cats.append("{}: {}".format(cat_name, val)) else: cats.append(val) cat_list.append(tuple(cats)) if clustergrammer: return cat_list attribute_names = [x['attribute'] for x in attribute_data['attributes'] if not whitelist or x['attribute'] in whitelist] return pd.MultiIndex.from_tuples(cat_list, names=['ID']+attribute_names)
def show_data_widget(self, upa, title=None, cell_id=None, tag="release"): """ Renders a widget using the generic kbaseNarrativeOutputCell container. First, it looks up the UPA to get its object type. It then uses that type to look up what the viewer app should be. This contains the widget and the parameter mapping to view that widget. It then maps all of these together to run show_output_widget against a widget with a set of parameters for it. If there's an error here at any step, it still renders a widget, but it makes a kbaseNarrativeError widget instead, that'll hopefully be informative. Parameters ---------- upa : string UPA defining a workspace object. Used to translate that object into parameters for the mapping to the data object used in the output cell. This may also be a Workspace reference path. title=None : string A title for the cell. If None, this just gets replaced with an empty string. cell_id=None : string if not None, this should be the id of the cell where the widget will live. Generated by the Narrative frontend. tag="release" : string All objects are related to their viewers by an app. This is the tag for that app's release state (should be one of release, beta, or dev) """ widget_name = 'widgets/function_output/kbaseDefaultObjectView' # set as default, overridden below widget_data = dict() upas = dict() info_tuple = clients.get('workspace').get_object_info_new({'objects': [{'ref': upa}], 'includeMetadata': 1})[0] bare_type = info_tuple[2].split('-')[0] type_module = bare_type.split(".")[0] type_spec = self._sm.get_type_spec(bare_type, raise_exception=False) if type_spec is None: widget_data = { "error": { "msg": "Unable to find viewer specification for objects of type {}.".format(bare_type), "method_name": "WidgetManager.show_data_widget", "traceback": "Can't find type spec info for type {}".format(bare_type) } } upas['upas'] = [upa] # doompety-doo else: if not type_spec.get('view_method_ids'): return "No viewer found for objects of type {}".format(bare_type) app_id = type_spec['view_method_ids'][0] app_spec = None try: app_spec = self._sm.get_spec(app_id, tag=tag) except Exception as e: widget_data = { "error": { "msg": "Unable to find specification for viewer app {}".format(app_id), "method_name": "WidgetManager.show_data_widget", "traceback": e.message } } if app_spec is not None: spec_params = self._sm.app_params(app_spec) input_params = {} is_ref_path = ';' in upa is_external = info_tuple[7] != os.environ['KB_WORKSPACE_ID'] # it's not safe to use reference yet (until we switch to them all over the Apps) # But in case we deal with ref-path we have to do it anyway: obj_param_value = upa if (is_ref_path or is_external) else info_tuple[1] upa_params = list() for param in spec_params: if param.get('allowed_types') is None or any((t == bare_type or t == type_module) for t in param.get('allowed_types', [])): input_params[param['id']] = obj_param_value upa_params.append(param['id']) (input_params, ws_refs) = validate_parameters(app_id, tag, spec_params, input_params) (widget_name, widget_data) = map_outputs_from_state([], input_params, app_spec) # Figure out params for upas. for mapping in app_spec.get('behavior', {}).get('output_mapping', []): if mapping.get('input_parameter', '') in upa_params and 'target_property' in mapping: upas[mapping['target_property']] = upa return self.show_output_widget( widget_name, widget_data, upas=upas, title=title, type="viewer", cell_id=cell_id )
def _run_app_internal(self, app_id, params, tag, version, cell_id, run_id, dry_run): """ Attemps to run the app, returns a Job with the running app info. Should *hopefully* also inject that app into the Narrative's metadata. Probably need some kind of JavaScript-foo to get that to work. Parameters: ----------- app_id - should be from the app spec, e.g. 'build_a_metabolic_model' or 'MegaHit/run_megahit'. params - a dictionary of parameters. tag - optional, one of [release|beta|dev] (default=release) version - optional, a semantic version string. Only released modules have versions, so if the tag is not 'release', and a version is given, a ValueError will be raised. **kwargs - these are the set of parameters to be used with the app. They can be found by using the app_usage function. If any non-optional apps are missing, a ValueError will be raised. """ ws_id = strict_system_variable('workspace_id') spec = self._get_validated_app_spec(app_id, tag, True, version=version) # Preflight check the params - all required ones are present, all # values are the right type, all numerical values are in given ranges spec_params = self.spec_manager.app_params(spec) spec_params_map = dict((spec_params[i]['id'], spec_params[i]) for i in range(len(spec_params))) ws_input_refs = extract_ws_refs(app_id, tag, spec_params, params) input_vals = self._map_inputs( spec['behavior']['kb_service_input_mapping'], params, spec_params_map) service_method = spec['behavior']['kb_service_method'] service_name = spec['behavior']['kb_service_name'] service_ver = spec['behavior'].get('kb_service_version', None) # Let the given version override the spec's version. if version is not None: service_ver = version # This is what calls the function in the back end - Module.method # This isn't the same as the app spec id. function_name = service_name + '.' + service_method job_meta = {'tag': tag} if cell_id is not None: job_meta['cell_id'] = cell_id if run_id is not None: job_meta['run_id'] = run_id # This is the input set for NJSW.run_job. Now we need the workspace id # and whatever fits in the metadata. job_runner_inputs = { 'method': function_name, 'service_ver': service_ver, 'params': input_vals, 'app_id': app_id, 'wsid': ws_id, 'meta': job_meta } if len(ws_input_refs) > 0: job_runner_inputs['source_ws_objects'] = ws_input_refs if dry_run: return job_runner_inputs # We're now almost ready to run the job. Last, we need an agent token. try: token_name = 'KBApp_{}'.format(app_id) token_name = token_name[:self.__MAX_TOKEN_NAME_LEN] agent_token = auth.get_agent_token(auth.get_auth_token(), token_name=token_name) except Exception as e: raise job_runner_inputs['meta']['token_id'] = agent_token['id'] # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'version': service_ver, 'username': system_variable('user_id'), 'wsid': ws_id } kblogging.log_event(self._log, "run_app", log_info) try: job_id = clients.get("job_service", token=agent_token['token']).run_job(job_runner_inputs) except Exception as e: log_info.update({'err': str(e)}) kblogging.log_event(self._log, "run_app_error", log_info) raise transform_job_exception(e) new_job = Job(job_id, app_id, input_vals, system_variable('user_id'), tag=tag, app_version=service_ver, cell_id=cell_id, run_id=run_id, token_id=agent_token['id']) self._send_comm_message('run_status', { 'event': 'launched_job', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id, 'job_id': job_id }) JobManager().register_new_job(new_job) if cell_id is not None: return else: return new_job
def _run_app_batch_internal(self, app_id, params, tag, version, cell_id, run_id, dry_run): batch_method = "kb_BatchApp.run_batch" batch_app_id = "kb_BatchApp/run_batch" batch_method_ver = "dev" batch_method_tag = "dev" ws_id = strict_system_variable('workspace_id') spec = self._get_validated_app_spec(app_id, tag, True, version=version) # Preflight check the params - all required ones are present, all # values are the right type, all numerical values are in given ranges spec_params = self.spec_manager.app_params(spec) # A list of lists of UPAs, used for each subjob. batch_ws_upas = list() # The list of actual input values, post-mapping. batch_run_inputs = list() for param_set in params: spec_params_map = dict((spec_params[i]['id'], spec_params[i]) for i in range(len(spec_params))) batch_ws_upas.append(extract_ws_refs(app_id, tag, spec_params, param_set)) batch_run_inputs.append(self._map_inputs( spec['behavior']['kb_service_input_mapping'], param_set, spec_params_map)) service_method = spec['behavior']['kb_service_method'] service_name = spec['behavior']['kb_service_name'] service_ver = spec['behavior'].get('kb_service_version', None) # Let the given version override the spec's version. if version is not None: service_ver = version # This is what calls the function in the back end - Module.method # This isn't the same as the app spec id. job_meta = { 'tag': batch_method_tag, 'batch_app': app_id, 'batch_tag': tag, 'batch_size': len(params), } if cell_id is not None: job_meta['cell_id'] = cell_id if run_id is not None: job_meta['run_id'] = run_id # Now put these all together in a way that can be sent to the batch processing app. batch_params = [{ "module_name": service_name, "method_name": service_method, "service_ver": service_ver, "wsid": ws_id, "meta": job_meta, "batch_params": [{ "params": batch_run_inputs[i], "source_ws_objects": batch_ws_upas[i] } for i in range(len(batch_run_inputs))], }] # We're now almost ready to run the job. Last, we need an agent token. try: token_name = 'KBApp_{}'.format(app_id) token_name = token_name[:self.__MAX_TOKEN_NAME_LEN] agent_token = auth.get_agent_token(auth.get_auth_token(), token_name=token_name) except Exception as e: raise job_meta['token_id'] = agent_token['id'] # This is the input set for NJSW.run_job. Now we need the workspace id # and whatever fits in the metadata. job_runner_inputs = { 'method': batch_method, 'service_ver': batch_method_ver, 'params': batch_params, 'app_id': batch_app_id, 'wsid': ws_id, 'meta': job_meta } # if len(ws_input_refs) > 0: # job_runner_inputs['source_ws_objects'] = ws_input_refs # if we're doing a dry run, just return the inputs that we made. if dry_run: return job_runner_inputs # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': batch_method_tag, 'version': service_ver, 'username': system_variable('user_id'), 'wsid': ws_id } kblogging.log_event(self._log, "run_batch_app", log_info) try: job_id = clients.get("job_service", token=agent_token['token']).run_job(job_runner_inputs) except Exception as e: log_info.update({'err': str(e)}) kblogging.log_event(self._log, "run_batch_app_error", log_info) raise transform_job_exception(e) new_job = Job(job_id, batch_app_id, batch_params, system_variable('user_id'), tag=batch_method_tag, app_version=batch_method_ver, cell_id=cell_id, run_id=run_id, token_id=agent_token['id'], meta=job_meta) self._send_comm_message('run_status', { 'event': 'launched_job', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id, 'job_id': job_id }) JobManager().register_new_job(new_job) if cell_id is not None: return else: return new_job
""" Some utility functions for running KBase Apps or Methods or whatever they are this week. """ __author__ = "Bill Riehl <*****@*****.**>, Roman Sutormin <*****@*****.**>" import os import re import biokbase.narrative.clients as clients app_version_tags = ['release', 'beta', 'dev'] _ws_client = clients.get('workspace') def check_tag(tag, raise_exception=False): """ Checks if the given tag is one of "release", "beta", or "dev". Returns a boolean. if raise_exception == True and the tag is bad, raises a ValueError """ tag_exists = tag in app_version_tags if not tag_exists and raise_exception: raise ValueError("Can't find tag %s - allowed tags are %s" % (tag, ", ".join(app_version_tags))) else: return tag_exists def system_variable(var): """ Returns a KBase system variable. Just a little wrapper. Parameters ---------- var: string, one of "workspace", "workspace_id", "token", "user_id"
def initialize_jobs(self, start_lookup_thread=True): """ Initializes this JobManager. This is expected to be run by a running Narrative, and naturally linked to a workspace. So it does the following steps. 1. app_util.system_variable('workspace_id') 2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id) 3. initialize the Job objects by running NJS.get_job_params (also gets app_id) 4. start the status lookup loop. """ the_time = int(round(time.time() * 1000)) self._send_comm_message('start', {'time': the_time}) ws_id = system_variable('workspace_id') try: nar_jobs = clients.get('user_and_job_state').list_jobs2({ 'authstrat': 'kbaseworkspace', 'authparams': [str(ws_id)] }) except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) error = { 'error': 'Unable to get initial jobs list', 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'service': 'user_and_job_state' } self._send_comm_message('job_init_err', error) raise new_e job_ids = [j[0] for j in nar_jobs] job_states = clients.get('job_service').check_jobs({ 'job_ids': job_ids, 'with_job_params': 1 }) job_param_info = job_states.get('job_params', {}) job_check_error = job_states.get('check_error', {}) error_jobs = dict() for info in nar_jobs: job_id = info[0] user_info = info[1] job_meta = info[10] try: if job_id in job_param_info: job_info = job_param_info[job_id] job = Job.from_state(job_id, job_info, user_info[0], app_id=job_info.get('app_id'), tag=job_meta.get('tag', 'release'), cell_id=job_meta.get('cell_id', None), run_id=job_meta.get('run_id', None), token_id=job_meta.get('token_id', None), meta=job_meta) # Note that when jobs for this narrative are initially loaded, # they are set to not be refreshed. Rather, if a client requests # updates via the start_job_update message, the refresh flag will # be set to True. self._running_jobs[job_id] = { 'refresh': 0, 'job': job } elif job_id in job_check_error: job_err_state = { 'job_state': 'error', 'error': { 'error': 'KBase execution engine returned an error while looking up this job.', 'message': job_check_error[job_id].get('message', 'No error message available'), 'name': 'Job Error', 'code': job_check_error[job_id].get('code', -999), 'exception': { 'error_message': 'Job lookup in execution engine failed', 'error_type': job_check_error[job_id].get('name', 'unknown'), 'error_stacktrace': job_check_error[job_id].get('error', '') } }, 'cell_id': job_meta.get('cell_id', None), 'run_id': job_meta.get('run_id', None), } error_jobs[job_id] = job_err_state except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) error = { 'error': 'Unable to get job info on initial lookup', 'job_id': job_id, 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'service': 'job_service' } self._send_comm_message('job_init_lookup_err', error) raise new_e # should crash and burn on any of these. if len(job_check_error): err_str = 'Unable to find info for some jobs on initial lookup' err_type = 'job_init_partial_err' if len(job_check_error) == len(nar_jobs): err_str = 'Unable to get info for any job on initial lookup' err_type = 'job_init_lookup_err' error = { 'error': err_str, 'job_errors': error_jobs, 'message': 'Job information was unavailable from the server', 'code': -2, 'source': 'jobmanager', 'name': 'jobmanager', 'service': 'job_service', } self._send_comm_message(err_type, error) if not self._running_lookup_loop and start_lookup_thread: # only keep one loop at a time in cause this gets called again! if self._lookup_timer is not None: self._lookup_timer.cancel() self._running_lookup_loop = True self._lookup_job_status_loop() else: self._lookup_all_job_status()
def _update_log(self): log_update = clients.get("job_service").get_job_logs( {'job_id': self.job_id, 'skip_lines': len(self._job_logs)}) if log_update['lines']: self._job_logs = self._job_logs + log_update['lines']
def infer_upas(self, widget_name, params): """ Use the given widget_name and parameters (to be passed to the widget) to infer any upas. This will generally mean using the workspace object name and workspace name to do a lookup in the Workspace and constructing the upa or upa path from there. widget_name - string - Name of the widget to be used, this gets looked up in the widget param map. This maps all widget input parameters onto some sensible language. params - dict - keys = id of parameter, values = value of parameter. So the general flow is something like this. We go through all parameters, see what context those map on to, and infer, from that, what are the workspace objects. We can then look up those objects by the workspace and object name, and use the info to construct UPAs. Example: wm.infer_upas("kbasePanGenome", { "ws": "my_workspace", "name": "my_pangenome" }) The widget parameter map has this entry: "kbasePanGenome": { "ws": "ws_name", "name": "obj_name" } So we know, by inference, that "my_workspace" is a workspace name, and "my_pangenome" is an object name. We can use this info to look up the object info from the Workspace, let's say it's 3/4/5. This then gets returned as another dict: { "name": "3/4/5" } This applies for lists, too. If, above, the value for the "name" parameter was a list of strings, this would treat all of those as objects, and try to return a list of UPAs instead. """ param_to_context = self.widget_param_map.get(widget_name, {}) obj_names = list() # list of tuples - first = param id, second = object name obj_refs = list() # list of tuples - first = param id, second = UPA obj_name_list = list() # list of tuples, but the second is a list of names obj_ref_list = list() # list of tuples, but second is a list of upas ws = None for param in params.keys(): if param in param_to_context: context = param_to_context[param] if context == "ws_id" or context == "ws_name": ws = params[param] elif context == "obj_name" or context == "obj_id": obj_names.append((param, params[param])) elif context == "obj_name_list": obj_name_list.append((param, params[param])) elif context == "obj_ref": obj_refs.append((param, params[param])) elif context == "obj_ref_list": obj_ref_list.append((param, params[param])) # return value will look like this: # { # param1: upa, # param2: upa # param3: [upa1, upa2], # ... etc # } upas = dict() # First, test obj_refs, and obj_refs_list # These might be references of the form ws_name/obj_name, which are not proper UPAs and # need to be resolved. Gotta test 'em all. lookup_params = list() info_params = list() for (param, ref) in obj_refs: if is_upa(str(ref)): upas[param] = ref elif is_ref(str(ref)): info_params.append({"ref": ref}) lookup_params.append(param) else: raise ValueError('Parameter {} has value {} which was expected to refer to an object'.format(param, ref)) # params for get_object_info3 for (param, name) in obj_names: # it's possible that these are misnamed and are actually upas already. test and add to # the upas dictionary if so. if is_upa(str(name)): upas[param] = name elif is_ref(str(name)): info_params.append({"ref": name}) lookup_params.append(param) else: info_params.append({"ref": "{}/{}".format(ws, name)}) lookup_params.append(param) if (len(lookup_params)): ws_client = clients.get('workspace') ws_info = ws_client.get_object_info3({'objects': info_params}) for (idx, path) in enumerate(ws_info['paths']): upas[lookup_params[idx]] = ';'.join(path) # obj_refs and obj_names are done. Do the list versions now. lookup_params = list() info_params = list() for (param, ref_list) in obj_ref_list: # error fast if any member of a list isn't actually a ref. # this might be me being lazy, but I suspect there's a problem if the inputs aren't # actually uniform. for ref in ref_list: if not is_ref(str(ref)): raise ValueError('Parameter {} has value {} which contains an item that is not a valid object reference'.format(param, ref_list)) lookup_params.append(param) info_params.append([{'ref': ref} for ref in ref_list]) for (param, name_list) in obj_name_list: info_param = list() for name in name_list: if is_ref(str(name)): info_param.append({'ref': name}) else: info_param.append({'ref': "{}/{}".format(ws, name)}) info_params.append(info_param) lookup_params.append(param) # This time we have a one->many mapping from params to each list. Run ws lookup in a loop for (idx, param) in enumerate(lookup_params): ws_info = ws_client.get_object_info3({'objects': info_params[idx]}) upas[param] = [';'.join(path) for path in ws_info['paths']] return upas
def initialize_jobs(self): """ Initializes this JobManager. This is expected to be run by a running Narrative, and naturally linked to a workspace. So it does the following steps. 1. app_util.system_variable('workspace_id') 2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id) 3. initialize the Job objects by running NJS.get_job_params on each of those (also gets app_id) 4. start the status lookup loop. """ ws_id = system_variable('workspace_id') try: nar_jobs = clients.get('user_and_job_state').list_jobs2({ 'authstrat': 'kbaseworkspace', 'authparams': [str(ws_id)] }) except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) error = { 'error': 'Unable to get initial jobs list', 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'service': 'user_and_job_state' } self._send_comm_message('job_init_err', error) raise new_e for info in nar_jobs: job_id = info[0] user_info = info[1] job_meta = info[10] try: job_info = clients.get('job_service').get_job_params(job_id)[0] self._running_jobs[job_id] = { 'refresh': True, 'job': Job.from_state(job_id, job_info, user_info[0], app_id=job_info.get('app_id'), tag=job_meta.get('tag', 'release'), cell_id=job_meta.get('cell_id', None), run_id=job_meta.get('run_id', None)) } except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) error = { 'error': 'Unable to get job info on initial lookup', 'job_id': job_id, 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'service': 'job_service' } self._send_comm_message('job_init_lookup_err', error) raise new_e # should crash and burn on any of these. if not self._running_lookup_loop: # only keep one loop at a time in cause this gets called again! if self._lookup_timer is not None: self._lookup_timer.cancel() self._running_lookup_loop = True self._lookup_job_status_loop() else: self._lookup_all_job_status()
def update_method_cell(cell): """ Updates a single method cell to fill these two constraints: 1. Become a code cell, NOT a markdown cell. 2. Translate the cell's metadata to the right structure. 3. Remove the MD code from the source area. Some assumptions made here: 1. Jobs associated with the cell are not available. So the only states are either editing or complete (default to editing) 2. We don't know what tag the methods came from, so go with 'release' """ # 1. Get its metadata and update it to be new cell-ish meta = cell['metadata']['kb-cell'] if 'method' not in meta: # throw an error? return cell # try to find cell_id, if not, make up a new one. method_info = meta['method'].get('info', {}) method_behavior = meta['method'].get('behavior', {}) widget_state = meta.get('widget_state', []) if len(widget_state): widget_state = widget_state[0] else: widget_state = {} runtime_state = None if 'state' in widget_state: runtime_state = widget_state['state'] method_params = runtime_state.get('params', None) if not method_params: method_params = {} # guess at the FSM state for the method cell from the runtime_state.runningState cur_state = runtime_state.get('runningState', 'input') fsm_state = {} if cur_state == 'input': fsm_state = { 'mode': 'editing', 'params': 'incomplete' } elif cur_state in ['submitted', 'queued', 'running', 'error']: # no longer access to the job, so just reset to input fsm_state = { 'mode': 'editing', 'params': 'complete' } else: # only one left is complete... fsm_state = { 'mode': 'success', 'params': 'complete' } ts = widget_state.get('time', None) if ts: ts = datetime.datetime.utcfromtimestamp(ts/1000.0).strftime('%a, %d %b %Y %H:%M:%S GMT') git_hash = method_info.get('git_commit_hash', None) app_name = method_info.get('id', '') # the app_name in this case, is everything after the slash. So MegaHit/run_megahit would just be 'run_megahit' app_name = app_name[app_name.find('/')+1:] module_name = method_behavior.get('kb_service_name', None) tag = None # now we get the version, if it exists. # print("{}/{}".format(module_name, git_hash)) # Suddenly, this is very complex... # Need git_hash and module_name to look up the version. # if lookup succeeds - # if has a release tag, use it. # if not, lookup the module's info (get_module_info), use the most released one (release > beta > dev) and change the hash # if lookup fails - # try again with just the module info # if THAT fails, the cell can't be updated. # if no git_hash or module_name, it's not an SDK-based cell and can't be looked up. if git_hash and module_name: cat = clients.get('catalog') tag_pref_order = ['release', 'beta', 'dev'] try: # print('looking up ' + module_name + ' hash ' + git_hash) version_info = cat.get_module_version({'module_name': module_name, 'version': git_hash}) if 'release_tags' in version_info: tags = version_info['release_tags'] if len(tags) > 0: tags = [t.lower() for t in tags] for tag_pref in tag_pref_order: if tag_pref in tags: tag = tag_pref if tag is None: raise Exception("No release tag found!") except Exception as e: # print("Exception found: {}".format(str(e))) try: # print("Searching for module info...") mod_info = cat.get_module_info({'module_name': module_name}) # look for most recent (R > B > D) release tag with the app. for tag_pref in tag_pref_order: tag_info = mod_info.get(tag_pref, None) if tag_info is not None and app_name in tag_info.get('narrative_methods', []): tag = tag_pref break # print("tag set to {}".format(tag)) except Exception as e2: print("Exception found: {}".format(e2)) else: # it's not an SDK method! do something else! return obsolete_method_cell(cell, method_info.get('id'), method_info.get('name'), meta['method'], method_params) new_meta = { 'type': 'app', 'attributes': { 'title': method_info.get('name', 'Unnamed App'), 'id': unicode(uuid.uuid4()), 'status': 'new', 'created': ts, # default to last saved time 'lastLoaded': ts, }, 'appCell': { 'app': { 'id': method_info.get('id', 'unknown'), 'gitCommitHash': git_hash, 'version': method_info.get('ver', None), 'tag': tag }, 'state': { 'edit': 'editing', 'params': None, 'code': None, 'request': None, 'result': None }, 'params': method_params, 'user-settings': { 'showCodeInputArea': False, 'showDeveloperOptions': False } # 'fsm': { # 'currentState': fsm_state # } } } # Finally, turn it into a code cell. cell['cell_type'] = u'code' cell['execution_count'] = None cell['outputs'] = [] cell['metadata']['kbase'] = new_meta del cell['metadata']['kb-cell'] cell['source'] = u'' return cell