def get(self, notebook_id): client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') app_log.info("Http-Headers={}".format( list(self.request.headers.get_all()))) # save client ip in environ for later logging kbase_env.client_ip = client_ip app_log.debug("notebook_id = " + notebook_id) found_cookies = [ self.cookies[c] for c in all_cookies if c in self.cookies ] if found_cookies: # Push the cookie cookie_val = urllib.unquote(found_cookies[0].value) app_log.debug("kbase cookie = {}".format(cookie_val)) cookie_obj = cookie_pusher(cookie_val, getattr(self, 'notebook_manager')) # Log the event user = cookie_obj.get('user_id', '') session = cookie_obj.get('kbase_sessionid', '') kbase_env.narrative = notebook_id kbase_env.session = session kbase_env.client_ip = client_ip log_event(g_log, 'open', {'user': user, 'user_agent': ua}) app_log.info("After get(): KB_NARRATIVE={}".format( os.environ.get('KB_NARRATIVE', 'none'))) return old_get(self, notebook_id)
def read_narrative(self, obj_ref, content=True, include_metadata=True): """ Fetches a Narrative and its object info from the Workspace If content is False, this only returns the Narrative's info and metadata, otherwise, it returns the whole workspace object. This is mainly a wrapper around Workspace.get_objects(), except that it always returns a dict. If content is False, it returns a dict containing a single key: 'info', with the object info and, optionally, metadata. obj_ref: expected to be in the format "wsid/objid", e.g. "4337/1" or even "4337/1/1" to include version. """ self._test_obj_ref(obj_ref) try: if content: nar_data = self.ws_client().get_objects([{'ref': obj_ref}]) if nar_data: nar = nar_data[0] nar['data'] = update_narrative(nar['data']) return nar else: log_event(g_log, 'read_narrative testing existence', {'ref': obj_ref}) nar_data = self.ws_client().get_object_info_new({ u'objects': [{'ref': obj_ref}], u'includeMetadata': 1 if include_metadata else 0 }) if nar_data: return {'info': nar_data[0]} except ServerError, err: raise self._ws_err_to_perm_err(err)
def list_narratives(self, ws_id=None): # self.log.debug("Listing Narratives") # self.log.debug("kbase_session = %s" % str(self.kbase_session)) """ By default, this searches for Narrative types in any workspace that the current token has read access to. Works anonymously as well. If the ws_id field is not None, it will only look up Narratives in that particular workspace (by its numerical id). Returns a list of dictionaries of object descriptions, one for each Narrative. The keys in each dictionary are those from the list_objects_fields list above. This is just a wrapper around the Workspace list_objects command. Raises: PermissionsError, if access is denied; ValueError is ws_id is not numeric. """ log_event(g_log, 'list_narratives start', {'ws_id': ws_id}) list_obj_params = {'type': self.nar_type, 'includeMetadata': 1} if ws_id: try: int(ws_id) # will throw an exception if ws_id isn't an int list_obj_params['ids'] = [ws_id] except ValueError: raise try: ws = self.ws_client() res = ws.list_objects(list_obj_params) except ServerError, err: raise self._ws_err_to_perm_err(err)
def get(self, path): """ Inject the user's KBase cookie before trying to look up a file. One of our big use cases bypasses the typical Jupyter login mechanism. """ cookie_regex = re.compile('([^ =|]+)=([^\|]*)') client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') found_cookies = [self.cookies[c] for c in all_cookies if c in self.cookies] if found_cookies: cookie_val = urllib.unquote(found_cookies[0].value) cookie_obj = { k: v.replace('EQUALSSIGN', '=').replace('PIPESIGN', '|') for k, v in cookie_regex.findall(cookie_val) } # if app_log.isEnabledFor(logging.DEBUG): # app_log.debug("kbase cookie = {}".format(cookie_val)) # app_log.debug("KBaseLoginHandler.get: user_id={uid} token={tok}" # .format(uid=sess.get('token', 'none'), # tok=sess.get('token', 'none'))) biokbase.auth.set_environ_token(cookie_obj.get('token', None)) kbase_env.session = cookie_obj.get('kbase_sessionid', '') kbase_env.client_ip = client_ip kbase_env.user = cookie_obj.get('user_id', '') log_event(g_log, 'session_start', {'user': kbase_env.user, 'user_agent': ua}) """get renders the notebook template if a name is given, or redirects to the '/files/' handler if the name is not given.""" path = path.strip('/') cm = self.contents_manager # will raise 404 on not found try: model = cm.get(path, content=False) except web.HTTPError as e: raise # if e.status_code == 404 and 'files' in path.split('/'): # # 404, but '/files/' in URL, let FilesRedirect take care of it # return FilesRedirectHandler.redirect_to_files(self, path) # else: # raise if model['type'] != 'notebook': # not a notebook, redirect to files return FilesRedirectHandler.redirect_to_files(self, path) name = url_escape(path.rsplit('/', 1)[-1]) path = url_escape(path) self.write(self.render_template('notebook.html', notebook_path=path, notebook_name=path, kill_kernel=False, mathjax_url=self.mathjax_url, ) )
def _handle_comm_message(self, msg: dict) -> None: """ Handles comm messages that come in from the other end of the KBaseJobs channel. Messages get translated into a JobRequest object, which is then passed to the right handler, based on the request. A handler dictionary is created on JobComm creation. Any unknown request is returned over the channel as a job_comm_error, and a ValueError is raised. """ request = JobRequest(msg) kblogging.log_event(self._log, "handle_comm_message", {"msg": request.request}) if request.request in self._msg_map: self._msg_map[request.request](request) else: self.send_comm_message( "job_comm_error", { "message": "Unknown message", "request_type": request.request }, ) raise ValueError(f"Unknown KBaseJobs message '{request.request}'")
def read_narrative(self, ref, content=True, include_metadata=True): """ Fetches a Narrative and its object info from the Workspace If content is False, this only returns the Narrative's info and metadata, otherwise, it returns the whole workspace object. This is mainly a wrapper around Workspace.get_objects2(), except that it always returns a dict. If content is False, it returns a dict containing a single key: 'info', with the object info and, optionally, metadata. :param ref: a NarrativeRef :param content: if True, returns the narrative document, otherwise just the metadata :param include_metadata: if True, includes the object metadata when returning """ log_event(g_log, "reading narrative", {'ref': str(ref), 'content': content, 'include_meta': include_metadata}) assert isinstance(ref, NarrativeRef), "read_narrative must use a NarrativeRef as input!" try: if content: nar_data = self.ws_client().get_objects2({'objects': [{'ref': str(ref)}]}) nar = nar_data['data'][0] self._validate_nar_type(nar['info'][2], ref) nar['data'] = update_narrative(nar['data']) return nar else: log_event(g_log, 'read_narrative testing existence', {'ref': str(ref)}) nar_data = self.ws_client().get_object_info3({ 'objects': [{'ref': str(ref)}], 'includeMetadata': 1 if include_metadata else 0 }) nar_info = nar_data['infos'][0] self._validate_nar_type(nar_info[2], ref) return {'info': nar_info} except ServerError as err: raise WorkspaceError(err, ref.wsid)
def _run_dynamic_service_internal(self, app_id, params, tag, version, cell_id, run_id, **kwargs): # Intro tests: self.spec_manager.check_app(app_id, tag, raise_exception=True) if version is not None and tag != "release": raise ValueError( "App versions only apply to released app modules!") # Get the spec & params spec = self.spec_manager.get_spec(app_id, tag) if 'behavior' not in spec: raise ValueError( "This app appears invalid - it has no defined behavior") behavior = spec['behavior'] if 'script_module' in behavior or 'script_name' in behavior: # It's an old NJS script. These don't work anymore. raise ValueError( 'This app relies on a service that is now obsolete. Please contact the administrator.' ) # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'username': system_variable('user_id'), 'ws': system_variable('workspace') } kblogging.log_event(self._log, "run_dynamic_service", log_info) # Silly to keep this here, but we do not validate the incoming parameters. # If they are provided by the UI (we have cell_id), they are constructed # according to the spec, so are trusted; # Otherwise, if they are the product of direct code cell entry, this is a mode we do not # "support", so we can let it fail hard. # In the future when code cell interaction is supported for users, we will need to provide # robust validation and error reporting, but this may end up being (should be) provided by the # sdk execution infrastructure anyway input_vals = params function_name = spec['behavior']['kb_service_name'] + '.' + spec[ 'behavior']['kb_service_method'] try: result = clients.get("service").sync_call(function_name, input_vals, service_version=tag)[0] # if a ui call (a cell_id is defined) we send a result message, otherwise # just the raw result for display in a code cell. This is how we "support" # code cells for internal usage. if cell_id: self.send_cell_message('result', cell_id, run_id, {'result': result}) else: return result except: raise
def _parse_obj_ref(self, obj_ref): log_event(g_log, '_parse_obj_ref', {'ref': obj_ref}) m = obj_ref_regex.match(obj_ref) if m is None: return None return dict(wsid=m.group('wsid'), objid=m.group('objid'), ver=m.group('ver'))
def _run_widget_app_internal(self, app_id, tag, version, cell_id, run_id): self._send_comm_message('run_status', { 'event': 'validating_app', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id }) # Intro tests: self.spec_manager.check_app(app_id, tag, raise_exception=True) if version is not None and tag != "release": raise ValueError("App versions only apply to released app modules!") # Get the spec & params spec = self.spec_manager.get_spec(app_id, tag) if 'behavior' not in spec: raise ValueError("This app appears invalid - it has no defined behavior") behavior = spec['behavior'] if 'kb_service_input_mapping' in behavior: # it's a service! Should run this with run_app! raise ValueError('This app appears to be a long-running job! Please start it using the run_app function instead.') if 'script_module' in behavior or 'script_name' in behavior: # It's an old NJS script. These don't work anymore. raise ValueError('This app relies on a service that is now obsolete. Please contact the administrator.') # Here, we just deal with two behaviors: # 1. None of the above - it's a viewer. # 2. ***TODO*** python_class / python_function. Import and exec the python code. # for now, just map the inputs to outputs. # First, validate. # Preflight check the params - all required ones are present, all values are the right type, all numerical values are in given ranges #spec_params = self.spec_manager.app_params(spec) #(params, ws_refs) = self._validate_parameters(app_id, tag, spec_params, kwargs) log_info = { 'app_id': app_id, 'tag': tag, 'username': system_variable('user_id'), 'ws': system_variable('workspace') } kblogging.log_event(self._log, "run_widget_app", log_info) self._send_comm_message('run_status', { 'event': 'success', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id }) # now just map onto outputs. custom_widget = spec.get('widgets', {}).get('input', None) return WidgetManager().show_custom_widget(custom_widget, app_id, version, tag, spec, cell_id)
def get(self, path): """ Inject the user's KBase cookie before trying to look up a file. One of our big use cases bypasses the typical Jupyter login mechanism. """ cookie_regex = re.compile('([^ =|]+)=([^\|]*)') client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') auth_cookie = self.cookies.get(auth_cookie_name, None) if auth_cookie: cookie_val = urllib.unquote(auth_cookie.value) cookie_obj = { k: v.replace('EQUALSSIGN', '=').replace('PIPESIGN', '|') for k, v in cookie_regex.findall(cookie_val) } else: raise web.HTTPError(status_code=401, log_message='No auth cookie, denying access', reason='Authorization required for Narrative access') biokbase.auth.set_environ_token(cookie_obj.get('token', None)) kbase_env.session = cookie_obj.get('kbase_sessionid', '') kbase_env.client_ip = client_ip kbase_env.user = cookie_obj.get('user_id', '') log_event(g_log, 'session_start', {'user': kbase_env.user, 'user_agent': ua}) """get renders the notebook template if a name is given, or redirects to the '/files/' handler if the name is not given.""" path = path.strip('/') cm = self.contents_manager # will raise 404 on not found try: model = cm.get(path, content=False) except web.HTTPError as e: raise # if e.status_code == 404 and 'files' in path.split('/'): # # 404, but '/files/' in URL, let FilesRedirect take care of it # return FilesRedirectHandler.redirect_to_files(self, path) # else: # raise if model['type'] != 'notebook': # not a notebook, redirect to files return FilesRedirectHandler.redirect_to_files(self, path) name = url_escape(path.rsplit('/', 1)[-1]) path = url_escape(path) self.write(self.render_template('notebook.html', notebook_path=path, notebook_name=path, kill_kernel=False, mathjax_url=self.mathjax_url, ) )
def _get_all_job_states(self, job_ids=None): """ Returns the state for all running jobs. Returns a list where each element has this structure: { cell_id: (optional) id of the cell that spawned the job run_id: (optional) id of the job run awe_job_state: string creation_time: timestamp (ms since epoch) finished: 0/1 job_id: string job_state: string status: [ timestamp, _, _, _, _, _, _ ], (7-tuple) sub_jobs: [], ujs_url: string, child_jobs: [] } """ # 1. Get list of ids if job_ids is None: job_ids = self._running_jobs.keys() # 1.5 Go through job ids and remove ones that aren't found. job_ids = [j for j in job_ids if j in self._running_jobs] # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id # for batch lookup. job_states = dict() jobs_to_lookup = list() for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = dict(self._completed_job_states[job_id]) else: jobs_to_lookup.append(job_id) # 3. Lookup those jobs what need it. Cache 'em as we go, if finished. try: fetched_states = clients.get('job_service').check_jobs( {'job_ids': jobs_to_lookup}) fetched_states = sanitize_all_states(fetched_states) except Exception as e: kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)}) return {} error_states = fetched_states.get('check_error', {}) fetched_states = fetched_states.get('job_states', {}) for job_id in jobs_to_lookup: if job_id in fetched_states: state = fetched_states[job_id] state['cell_id'] = self._running_jobs[job_id]['job'].cell_id state['run_id'] = self._running_jobs[job_id]['job'].run_id if state.get('finished', 0) == 1: self._completed_job_states[state['job_id']] = dict(state) job_states[state['job_id']] = state elif job_id in error_states: error = error_states[job_id] job_states[state['job_id']] = {'lookup_error': error} return job_states
def _get_all_job_states(self, job_ids=None): """ Returns the state for all running jobs. Returns a list where each element has this structure: { cell_id: (optional) id of the cell that spawned the job run_id: (optional) id of the job run awe_job_state: string creation_time: timestamp (ms since epoch) finished: 0/1 job_id: string job_state: string status: [ timestamp, _, _, _, _, _, _ ], (7-tuple) sub_jobs: [], ujs_url: string, child_jobs: [] } """ # 1. Get list of ids if job_ids is None: job_ids = self._running_jobs.keys() # 1.5 Go through job ids and remove ones that aren't found. job_ids = [j for j in job_ids if j in self._running_jobs] # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id # for batch lookup. job_states = dict() jobs_to_lookup = list() for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = dict(self._completed_job_states[job_id]) else: jobs_to_lookup.append(job_id) # 3. Lookup those jobs what need it. Cache 'em as we go, if finished. try: fetched_states = clients.get('job_service').check_jobs({'job_ids': jobs_to_lookup}) fetched_states = sanitize_all_states(fetched_states) except Exception as e: kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)}) return {} error_states = fetched_states.get('check_error', {}) fetched_states = fetched_states.get('job_states', {}) for job_id in jobs_to_lookup: if job_id in fetched_states: state = fetched_states[job_id] state['cell_id'] = self._running_jobs[job_id]['job'].cell_id state['run_id'] = self._running_jobs[job_id]['job'].run_id if state.get('finished', 0) == 1: self._completed_job_states[state['job_id']] = dict(state) job_states[state['job_id']] = state elif job_id in error_states: error = error_states[job_id] job_states[state['job_id']] = {'lookup_error': error} return job_states
def _run_local_app_internal(self, app_id, params, widget_state, tag, version, cell_id, run_id): self._send_comm_message('run_status', { 'event': 'validating_app', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id }) spec = self._get_validated_app_spec(app_id, tag, False, version=version) # Here, we just deal with two behaviors: # 1. None of the above - it's a viewer. # 2. ***TODO*** python_class / python_function. # Import and exec the python code. # for now, just map the inputs to outputs. # First, validate. # Preflight check the params - all required ones are present, all # values are the right type, all numerical values are in given ranges spec_params = self.spec_manager.app_params(spec) (params, ws_refs) = validate_parameters(app_id, tag, spec_params, params) # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'username': system_variable('user_id'), 'ws': system_variable('workspace') } kblogging.log_event(self._log, "run_local_app", log_info) self._send_comm_message('run_status', { 'event': 'success', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id }) (output_widget, widget_params) = map_outputs_from_state([], params, spec) # All a local app does is route the inputs to outputs through the # spec's mapping, and then feed that into the specified output widget. wm = WidgetManager() if widget_state is not None: return wm.show_advanced_viewer_widget( output_widget, widget_params, widget_state, cell_id=cell_id, tag=tag ) else: return wm.show_output_widget( output_widget, widget_params, cell_id=cell_id, tag=tag )
def get(self, path): """ Inject the user's KBase cookie before trying to look up a file. One of our big use cases bypasses the typical Jupyter login mechanism. """ client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') auth_cookie = self.cookies.get(auth_cookie_name, None) if auth_cookie: token = urllib.unquote(auth_cookie.value) else: raise web.HTTPError( status_code=401, log_message='No auth cookie, denying access', reason='Authorization required for Narrative access') if token != kbase_env.auth_token: init_session_env(get_user_info(token), client_ip) log_event(g_log, 'session_start', { 'user': kbase_env.user, 'user_agent': ua }) """ get renders the notebook template if a name is given, or redirects to the '/files/' handler if the name is not given. """ path = path.strip('/') cm = self.contents_manager # will raise 404 on not found try: model = cm.get(path, content=False) except web.HTTPError as e: raise # if e.status_code == 404 and 'files' in path.split('/'): # # 404, but '/files/' in URL, let FilesRedirect take care of it # return FilesRedirectHandler.redirect_to_files(self, path) # else: # raise if model['type'] != 'notebook': # not a notebook, redirect to files return FilesRedirectHandler.redirect_to_files(self, path) name = url_escape(path.rsplit('/', 1)[-1]) path = url_escape(path) self.write( self.render_template('notebook.html', notebook_path=path, notebook_name=path, kill_kernel=False, mathjax_url=self.mathjax_url))
def list_narratives(self, ws_id=None): # self.log.debug("Listing Narratives") # self.log.debug("kbase_session = %s" % str(self.kbase_session)) """ By default, this searches for Narrative types in any workspace that the current token has read access to. Works anonymously as well. If the ws_id field is not None, it will only look up Narratives in that particular workspace (by its numerical id). Returns a list of dictionaries of object descriptions, one for each Narrative. The keys in each dictionary are those from the list_objects_fields list above. This is just a wrapper around the Workspace list_objects command. Raises: WorkspaceError, if access is denied; ValueError is ws_id is not numeric. """ log_event(g_log, "list_narratives start", {"ws_id": ws_id}) ws = self.ws_client() if ws_id: ws_ids = [int(ws_id) ] # will throw an exception if ws_id isn't an int else: ret = ws.list_workspace_ids({ "perm": "r", "onlyGlobal": 0, "excludeGlobal": 0 }) ws_ids = ret.get("workspaces", []) + ret.get("pub", []) try: res = [] for i in range(0, len(ws_ids), MAX_WORKSPACES): res += ws.list_objects({ "ids": ws_ids[i:i + MAX_WORKSPACES], "type": NARRATIVE_TYPE, "includeMetadata": 1, }) except ServerError as err: raise WorkspaceError(err, ws_ids) my_narratives = [dict(zip(LIST_OBJECTS_FIELDS, obj)) for obj in res] for nar in my_narratives: # Look first for the name in the object metadata. if it's not there, use # the object's name. If THAT'S not there, use Untitled. # This gives support for some rather old narratives that don't # have their name stashed in the metadata. nar["name"] = nar["meta"].get("name", nar.get("name", "Untitled")) return my_narratives
def _init_session(request, cookies): client_ip = request.remote_ip http_headers = request.headers ua = http_headers.get('User-Agent', 'unknown') auth_cookie = cookies.get(auth_cookie_name) if auth_cookie is not None: token = urllib.unquote(auth_cookie.value) else: raise web.HTTPError(status_code=401, log_message='No auth cookie, denying access', reason='Authorization required for Narrative access') if token != kbase_env.auth_token: init_session_env(get_user_info(token), client_ip) log_event(g_log, 'session_start', {'user': kbase_env.user, 'user_agent': ua})
def register_new_job(self, job: Job) -> None: """ Registers a new Job with the manager - should only be invoked when a new Job gets started. This stores the Job locally and pushes it over the comm channel to the Narrative where it gets serialized. Parameters: ----------- job : biokbase.narrative.jobs.job.Job object The new Job that was started. """ kblogging.log_event(self._log, "register_new_job", {"job_id": job.job_id}) self._running_jobs[job.job_id] = {'job': job, 'refresh': 0}
def initialize_jobs(self): """ Initializes this JobManager. This is expected to be run by a running Narrative, and naturally linked to a workspace. So it does the following steps. 1. app_util.system_variable('workspace_id') 2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id) 3. initialize the Job objects by running NJS.get_job_params (also gets app_id) 4. start the status lookup loop. """ ws_id = system_variable("workspace_id") job_states = dict() kblogging.log_event(self._log, "JobManager.initialize_jobs", {"ws_id": ws_id}) try: job_states = clients.get("execution_engine2").check_workspace_jobs( { "workspace_id": ws_id, "return_list": 0 }) self._running_jobs = dict() except Exception as e: kblogging.log_event(self._log, "init_error", {"err": str(e)}) new_e = transform_job_exception(e) raise new_e for job_id, job_state in job_states.items(): job_input = job_state.get("job_input", {}) job_meta = job_input.get("narrative_cell_info", {}) status = job_state.get("status") job = Job.from_state( job_id, job_input, job_state.get("user"), app_id=job_input.get("app_id"), tag=job_meta.get("tag", "release"), cell_id=job_meta.get("cell_id", None), run_id=job_meta.get("run_id", None), token_id=job_meta.get("token_id", None), meta=job_meta, ) self._running_jobs[job_id] = { "refresh": 1 if status not in ["completed", "errored", "terminated"] else 0, "job": job, }
def get(self, notebook_id): app_log.debug("notebook_id = " + notebook_id) found_cookies = [self.cookies[c] for c in all_cookies if c in self.cookies] if found_cookies: # Push the cookie cookie_val = urllib.unquote(found_cookies[0].value) app_log.debug("kbase cookie = {}".format(cookie_val)) cookie_obj = cookie_pusher(cookie_val, getattr(self, 'notebook_manager')) # Log the event user = cookie_obj.get('user_id', '') session = cookie_obj.get('kbase_sessionid', '') log_event(g_log, 'open', {'narr': notebook_id, 'user': user, 'session_id': session}) return old_get(self, notebook_id)
def _construct_job_status_set(self, job_ids: list) -> dict: """ Builds a set of job states for the list of job ids. """ # if cached, use 'em. # otherwise, lookup. # do transform # cache terminal ones. # return all. if not isinstance(job_ids, list): raise ValueError("job_ids must be a list") if job_ids is None: job_ids = self._running_jobs.keys() job_states = dict() jobs_to_lookup = list() # Fetch from cache of terminated jobs, where available. # These are already post-processed and ready to return. for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = self._completed_job_states[job_id] else: jobs_to_lookup.append(job_id) fetched_states = dict() # Get the rest of states direct from EE2. if len(jobs_to_lookup): try: fetched_states = clients.get("execution_engine2").check_jobs({ "job_ids": jobs_to_lookup, "exclude_fields": EXCLUDED_JOB_STATE_FIELDS, "return_list": 0 }) except Exception as e: kblogging.log_event(self._log, "construct_job_status_set", {"err": str(e)}) for job_id, state in fetched_states.items(): revised_state = self._construct_job_status(self.get_job(job_id), state) if revised_state["state"]["status"] in TERMINAL_STATES: self._completed_job_states[job_id] = revised_state job_states[job_id] = revised_state return job_states
def initialize_jobs(self): """ Initializes this JobManager. This is expected to be run by a running Narrative, and naturally linked to a workspace. So it does the following steps. 1. app_util.system_variable('workspace_id') 2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id) 3. initialize the Job objects by running NJS.get_job_params (also gets app_id) 4. start the status lookup loop. """ ws_id = system_variable("workspace_id") job_states = dict() kblogging.log_event(self._log, "JobManager.initialize_jobs", {'ws_id': ws_id}) try: job_states = clients.get('execution_engine2').check_workspace_jobs( { 'workspace_id': ws_id, 'return_list': 0 }) self._running_jobs = dict() except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) raise new_e for job_id, job_state in job_states.items(): job_input = job_state.get('job_input', {}) job_meta = job_input.get('narrative_cell_info', {}) status = job_state.get('status') job = Job.from_state(job_id, job_input, job_state.get('user'), app_id=job_input.get('app_id'), tag=job_meta.get('tag', 'release'), cell_id=job_meta.get('cell_id', None), run_id=job_meta.get('run_id', None), token_id=job_meta.get('token_id', None), meta=job_meta) self._running_jobs[job_id] = { 'refresh': 1 if status not in ['completed', 'errored', 'terminated'] else 0, 'job': job }
def initialize_jobs(self, cell_ids: List[str] = None) -> None: """ Initializes this JobManager. This is expected to be run by a running Narrative, and naturally linked to a workspace. So it does the following steps. 1. gets the current workspace ID from app_util.system_variable('workspace_id') 2. get list of jobs with that ws id from ee2 (also gets tag, cell_id, run_id) 3. initialize the Job objects and add them to the running jobs list 4. start the status lookup loop. """ ws_id = system_variable("workspace_id") job_states = dict() kblogging.log_event(self._log, "JobManager.initialize_jobs", {"ws_id": ws_id}) try: job_states = clients.get("execution_engine2").check_workspace_jobs( { "workspace_id": ws_id, "return_list": 0, # do not remove "exclude_fields": JOB_INIT_EXCLUDED_JOB_STATE_FIELDS, } ) except Exception as e: kblogging.log_event(self._log, "init_error", {"err": str(e)}) new_e = transform_job_exception(e, "Unable to initialize jobs") raise new_e self._running_jobs = dict() job_states = self._reorder_parents_children(job_states) for job_state in job_states.values(): child_jobs = None if job_state.get("batch_job"): child_jobs = [ self.get_job(child_id) for child_id in job_state.get("child_jobs", []) ] job = Job(job_state, children=child_jobs) # Set to refresh when job is not in terminal state # and when job is present in cells (if given) # and when it is not part of a batch refresh = not job.was_terminal() and not job.batch_id if cell_ids is not None: refresh = refresh and job.in_cells(cell_ids) self.register_new_job(job, refresh)
def get(self): """ Initializes the KBase session from the cookie passed into it. """ cookie_regex = re.compile('([^ =|]+)=([^\|]*)') client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') # save client ip in environ for later logging kbase_env.client_ip = client_ip found_cookies = [ self.cookies[c] for c in all_cookies if c in self.cookies ] if found_cookies: # Push the cookie cookie_val = urllib.unquote(found_cookies[0].value) cookie_obj = { k: v.replace('EQUALSSIGN', '=').replace('PIPESIGN', '|') for k, v in cookie_regex.findall(cookie_val) } if app_log.isEnabledFor(logging.DEBUG): app_log.debug("kbase cookie = {}".format(cookie_val)) app_log.debug( "KBaseLoginHandler.get: user_id={uid} token={tok}".format( uid=sess.get('token', 'none'), tok=sess.get('token', 'none'))) biokbase.auth.set_environ_token(cookie_obj.get('token', None)) kbase_env.session = cookie_obj.get('kbase_sessionid', '') kbase_env.client_ip = client_ip kbase_env.user = cookie_obj.get('user_id', '') log_event(g_log, 'session_start', { 'user': kbase_env.user, 'user_agent': ua }) self.current_user = kbase_env.user app_log.info("KBaseLoginHandler.get(): user={}".format(kbase_env.user)) if self.current_user: self.redirect(self.get_argument('next', default=self.base_url)) else: self.write('This is a test?')
def get(self): client_ip = self.request.remote_ip http_headers = self.request.headers user = kbase_env.user ua = http_headers.get('User-Agent', 'unknown') kbase_env.auth_token = 'none' kbase_env.narrative = 'none' kbase_env.session = 'none' kbase_env.user = '******' kbase_env.workspace = 'none' biokbase.auth.set_environ_token(None) app_log.info('Successfully logged out') log_event(g_log, 'session_close', {'user': user, 'user_agent': ua}) self.write(self.render_template('logout.html', message={'info': 'Successfully logged out'}))
def _init_session(request, cookies): client_ip = request.remote_ip http_headers = request.headers ua = http_headers.get("User-Agent", "unknown") auth_cookie = cookies.get(auth_cookie_name) if auth_cookie is not None: token = urllib.parse.unquote(auth_cookie.value) else: raise web.HTTPError( status_code=401, log_message="No auth cookie, denying access", reason="Authorization required for Narrative access", ) if token != kbase_env.auth_token: init_session_env(get_user_info(token), client_ip) log_event(g_log, "session_start", { "user": kbase_env.user, "user_agent": ua })
def get(self): """ Initializes the KBase session from the cookie passed into it. """ # cookie_regex = re.compile('([^ =|]+)=([^\|]*)') client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') # save client ip in environ for later logging kbase_env.client_ip = client_ip auth_cookie = self.cookies.get(auth_cookie_name, None) if auth_cookie: token = urllib.unquote(auth_cookie.value) auth_info = dict() try: auth_info = get_user_info(token) except Exception as e: app_log.error( "Unable to get user information from authentication token!" ) raise # re-enable if token logging info is needed. # if app_log.isEnabledFor(logging.DEBUG): # app_log.debug("kbase cookie = {}".format(cookie_val)) # app_log.debug("KBaseLoginHandler.get: user_id={uid} token={tok}" # .format(uid=auth_info.get('user', 'none'), # tok=token)) init_session_env(auth_info, client_ip) self.current_user = kbase_env.user log_event(g_log, 'session_start', { 'user': kbase_env.user, 'user_agent': ua }) app_log.info("KBaseLoginHandler.get(): user={}".format(kbase_env.user)) if self.current_user: self.redirect(self.get_argument('next', default=self.base_url)) else: self.write('This is a test?')
def _get_all_job_states(self, job_ids=None): """ Returns the state for all running jobs """ # 1. Get list of ids if job_ids is None: job_ids = self._running_jobs.keys() # 1.5 Go through job ids and remove ones that aren't found. job_ids = [j for j in job_ids if j in self._running_jobs] # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id # for batch lookup. job_states = dict() jobs_to_lookup = list() for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = dict(self._completed_job_states[job_id]) else: jobs_to_lookup.append(job_id) # 3. Lookup those jobs what need it. Cache 'em as we go, if finished. try: fetched_states = clients.get('job_service').check_jobs( {'job_ids': jobs_to_lookup}) except Exception as e: kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)}) return {} error_states = fetched_states.get('check_errors', {}) fetched_states = fetched_states.get('job_states', {}) for job_id in jobs_to_lookup: if job_id in fetched_states: state = fetched_states[job_id] state['cell_id'] = self._running_jobs[job_id]['job'].cell_id state['run_id'] = self._running_jobs[job_id]['job'].run_id if state.get('finished', 0) == 1: self._completed_job_states[state['job_id']] = dict(state) job_states[state['job_id']] = state elif job_id in error_states: error = error_states[job_id] job_states[state['job_id']] = {'lookup_error': error} return job_states
def get(self, path): _init_session(self.request, self.cookies) """ get renders the notebook template if a name is given, or redirects to the '/files/' handler if the name is not given. """ path = path.strip("/") cm = self.contents_manager # will raise 404 on not found try: model = cm.get(path, content=False) except web.HTTPError as e: log_event(g_log, "loading_error", {"error": str(e)}) if e.status_code == 403: self.write(self.render_template("403.html", status_code=403)) return else: self.write( self.render_template( "generic_error.html", message=e.log_message, status_code=e.status_code, )) return if model.get("type") != "notebook": # not a notebook, redirect to files return FilesRedirectHandler.redirect_to_files(self, path) path = url_escape(path) self.write( self.render_template( "notebook.html", notebook_path=path, notebook_name=path, kill_kernel=False, mathjax_url=self.mathjax_url, google_analytics_id=URLS.google_analytics_id, userName=kbase_env.user, google_ad_id=URLS.google_ad_id, google_ad_conversion=URLS.google_ad_conversion, ))
def _run_dynamic_service_internal(self, app_id, params, tag, version, cell_id, run_id): spec = self._get_validated_app_spec(app_id, tag, False, version=version) # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'username': system_variable('user_id'), 'ws': system_variable('workspace') } kblogging.log_event(self._log, "run_dynamic_service", log_info) # Silly to keep this here, but we do not validate the incoming parameters. # If they are provided by the UI (we have cell_id), they are constructed # according to the spec, so are trusted; # Otherwise, if they are the product of direct code cell entry, this is a mode we do not # "support", so we can let it fail hard. # In the future when code cell interaction is supported for users, we will need to provide # robust validation and error reporting, but this may end up being (should be) provided by the # sdk execution infrastructure anyway input_vals = params function_name = spec['behavior']['kb_service_name'] + '.' + spec[ 'behavior']['kb_service_method'] try: result = clients.get("service").sync_call(function_name, input_vals, service_version=tag)[0] # if a ui call (a cell_id is defined) we send a result message, otherwise # just the raw result for display in a code cell. This is how we "support" # code cells for internal usage. if cell_id: self.send_cell_message('result', cell_id, run_id, {'result': result}) else: return result except: raise
def list_narratives(self, ws_id=None): # self.log.debug("Listing Narratives") # self.log.debug("kbase_session = %s" % str(self.kbase_session)) """ By default, this searches for Narrative types in any workspace that the current token has read access to. Works anonymously as well. If the ws_id field is not None, it will only look up Narratives in that particular workspace (by its numerical id). Returns a list of dictionaries of object descriptions, one for each Narrative. The keys in each dictionary are those from the list_objects_fields list above. This is just a wrapper around the Workspace list_objects command. Raises: WorkspaceError, if access is denied; ValueError is ws_id is not numeric. """ log_event(g_log, 'list_narratives start', {'ws_id': ws_id}) list_obj_params = {'type': self.nar_type, 'includeMetadata': 1} if ws_id: try: int(ws_id) # will throw an exception if ws_id isn't an int list_obj_params['ids'] = [ws_id] except ValueError: raise try: ws = self.ws_client() res = ws.list_objects(list_obj_params) except ServerError as err: raise WorkspaceError(err, ws_id) my_narratives = [dict(zip(list_objects_fields, obj)) for obj in res] for nar in my_narratives: # Look first for the name in the object metadata. if it's not there, use # the object's name. If THAT'S not there, use Untitled. # This gives support for some rather old narratives that don't # have their name stashed in the metadata. nar['name'] = nar['meta'].get('name', nar.get('name', 'Untitled')) return my_narratives
def get(self): client_ip = self.request.remote_ip http_headers = self.request.headers user = kbase_env.user ua = http_headers.get('User-Agent', 'unknown') kbase_env.auth_token = 'none' kbase_env.narrative = 'none' kbase_env.session = 'none' kbase_env.user = '******' kbase_env.workspace = 'none' biokbase.auth.set_environ_token(None) app_log.info('Successfully logged out') log_event(g_log, 'session_close', {'user': user, 'user_agent': ua}) self.write( self.render_template('logout.html', message={'info': 'Successfully logged out'}))
def get(self): self.request.remote_ip http_headers = self.request.headers user = kbase_env.user ua = http_headers.get("User-Agent", "unknown") kbase_env.auth_token = "none" kbase_env.narrative = "none" kbase_env.session = "none" kbase_env.user = "******" kbase_env.workspace = "none" set_environ_token(None) app_log.info("Successfully logged out") log_event(g_log, "session_close", {"user": user, "user_agent": ua}) self.write( self.render_template("logout.html", message={"info": "Successfully logged out"}))
def get(self, path): _init_session(self.request, self.cookies) """ get renders the notebook template if a name is given, or redirects to the '/files/' handler if the name is not given. """ path = path.strip('/') cm = self.contents_manager # will raise 404 on not found try: model = cm.get(path, content=False) except web.HTTPError as e: log_event(g_log, 'loading_error', {'error': str(e)}) if e.status_code == 403: self.write(self.render_template('403.html', status_code=403)) return else: self.write(self.render_template( 'generic_error.html', message=e.log_message, status_code=e.status_code )) return if model.get('type') != 'notebook': # not a notebook, redirect to files return FilesRedirectHandler.redirect_to_files(self, path) path = url_escape(path) self.write( self.render_template( 'notebook.html', notebook_path=path, notebook_name=path, kill_kernel=False, mathjax_url=self.mathjax_url, google_analytics_id=URLS.google_analytics_id, userName=kbase_env.user, google_ad_id=URLS.google_ad_id, google_ad_conversion=URLS.google_ad_conversion, ) )
def get(self): """ Initializes the KBase session from the cookie passed into it. """ cookie_regex = re.compile('([^ =|]+)=([^\|]*)') client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') # save client ip in environ for later logging kbase_env.client_ip = client_ip auth_cookie = self.cookies.get(auth_cookie_name, None) if auth_cookie: # Push the cookie cookie_val = urllib.unquote(auth_cookie.value) cookie_obj = { k: v.replace('EQUALSSIGN', '=').replace('PIPESIGN', '|') for k, v in cookie_regex.findall(cookie_val) } if app_log.isEnabledFor(logging.DEBUG): app_log.debug("kbase cookie = {}".format(cookie_val)) app_log.debug("KBaseLoginHandler.get: user_id={uid} token={tok}" .format(uid=sess.get('token', 'none'), tok=sess.get('token', 'none'))) biokbase.auth.set_environ_token(cookie_obj.get('token', None)) kbase_env.session = cookie_obj.get('kbase_sessionid', '') kbase_env.client_ip = client_ip kbase_env.user = cookie_obj.get('user_id', '') log_event(g_log, 'session_start', {'user': kbase_env.user, 'user_agent': ua}) self.current_user = kbase_env.user app_log.info("KBaseLoginHandler.get(): user={}".format(kbase_env.user)) if self.current_user: self.redirect(self.get_argument('next', default=self.base_url)) else: self.write('This is a test?')
def _get_all_job_states(self, job_ids=None): """ Returns the state for all running jobs """ # 1. Get list of ids if job_ids is None: job_ids = self._running_jobs.keys() # 1.5 Go through job ids and remove ones that aren't found. job_ids = [j for j in job_ids if j in self._running_jobs] # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id # for batch lookup. job_states = dict() jobs_to_lookup = list() for job_id in job_ids: if job_id in self._completed_job_states: job_states[job_id] = dict(self._completed_job_states[job_id]) else: jobs_to_lookup.append(job_id) # 3. Lookup those jobs what need it. Cache 'em as we go, if finished. try: fetched_states = clients.get('job_service').check_jobs({'job_ids': jobs_to_lookup}) except Exception as e: kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)}) return {} error_states = fetched_states.get('check_errors', {}) fetched_states = fetched_states.get('job_states', {}) for job_id in jobs_to_lookup: if job_id in fetched_states: state = fetched_states[job_id] state['cell_id'] = self._running_jobs[job_id]['job'].cell_id state['run_id'] = self._running_jobs[job_id]['job'].run_id if state.get('finished', 0) == 1: self._completed_job_states[state['job_id']] = dict(state) job_states[state['job_id']] = state elif job_id in error_states: error = error_states[job_id] job_states[state['job_id']] = {'lookup_error': error} return job_states
def _run_dynamic_service_internal(self, app_id, params, tag, version, cell_id, run_id): spec = self._get_validated_app_spec(app_id, tag, False, version=version) # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'username': system_variable('user_id'), 'ws': system_variable('workspace') } kblogging.log_event(self._log, "run_dynamic_service", log_info) # Silly to keep this here, but we do not validate the incoming parameters. # If they are provided by the UI (we have cell_id), they are constructed # according to the spec, so are trusted; # Otherwise, if they are the product of direct code cell entry, this is a mode we do not # "support", so we can let it fail hard. # In the future when code cell interaction is supported for users, we will need to provide # robust validation and error reporting, but this may end up being (should be) provided by the # sdk execution infrastructure anyway input_vals = params function_name = spec['behavior']['kb_service_name'] + '.' + spec['behavior']['kb_service_method'] try: result = clients.get("service").sync_call( function_name, input_vals, service_version=tag )[0] # if a ui call (a cell_id is defined) we send a result message, otherwise # just the raw result for display in a code cell. This is how we "support" # code cells for internal usage. if cell_id: self.send_cell_message('result', cell_id, run_id, { 'result': result }) else: return result except: raise
def get(self): """ Initializes the KBase session from the cookie passed into it. """ # cookie_regex = re.compile('([^ =|]+)=([^\|]*)') client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') # save client ip in environ for later logging kbase_env.client_ip = client_ip auth_cookie = self.cookies.get(auth_cookie_name, None) if auth_cookie: token = urllib.unquote(auth_cookie.value) auth_info = dict() try: auth_info = get_user_info(token) except Exception as e: app_log.error("Unable to get user information from authentication token!") raise # re-enable if token logging info is needed. # if app_log.isEnabledFor(logging.DEBUG): # app_log.debug("kbase cookie = {}".format(cookie_val)) # app_log.debug("KBaseLoginHandler.get: user_id={uid} token={tok}" # .format(uid=auth_info.get('user', 'none'), # tok=token)) init_session_env(auth_info, client_ip) self.current_user = kbase_env.user log_event(g_log, 'session_start', {'user': kbase_env.user, 'user_agent': ua}) app_log.info("KBaseLoginHandler.get(): user={}".format(kbase_env.user)) if self.current_user: self.redirect(self.get_argument('next', default=self.base_url)) else: self.write('This is a test?')
def _handle_comm_message(self, msg: dict) -> dict: """ Handles comm messages that come in from the other end of the KBaseJobs channel. Messages get translated into one or more JobRequest objects, which are then passed to the right handler, based on the request. A handler dictionary is created on JobComm creation. Any unknown request is returned over the channel with message type 'job_error', and a JobRequestException is raised. """ with exc_to_msg(msg): request = JobRequest(msg) kblogging.log_event( self._log, "handle_comm_message", {"msg": request.request_type} ) if request.request_type not in self._msg_map: raise JobRequestException( f"Unknown KBaseJobs message '{request.request_type}'" ) return self._msg_map[request.request_type](request)
def get(self, path): _init_session(self.request, self.cookies) """ get renders the notebook template if a name is given, or redirects to the '/files/' handler if the name is not given. """ path = path.strip('/') cm = self.contents_manager # will raise 404 on not found try: model = cm.get(path, content=False) except web.HTTPError as e: log_event(g_log, 'loading_error', {'error': str(e)}) if e.status_code == 403: self.write(self.render_template('403.html', status_code=403)) return else: self.write( self.render_template('generic_error.html', message=e.log_message, status_code=e.status_code)) return if model.get('type') != 'notebook': # not a notebook, redirect to files return FilesRedirectHandler.redirect_to_files(self, path) path = url_escape(path) self.write( self.render_template( 'notebook.html', notebook_path=path, notebook_name=path, kill_kernel=False, mathjax_url=self.mathjax_url, google_analytics_id=URLS.google_analytics_id, ))
def register_new_job(self, job: Job, refresh: bool = None) -> None: """ Registers a new Job with the manager and stores the job locally. This should only be invoked when a new Job gets started. Parameters: ----------- job : biokbase.narrative.jobs.job.Job object The new Job that was started. """ kblogging.log_event(self._log, "register_new_job", {"job_id": job.job_id}) if refresh is None: refresh = not job.was_terminal() self._running_jobs[job.job_id] = {"job": job, "refresh": refresh} # add the new job to the _jobs_by_cell_id mapping if there is a cell_id present if job.cell_id: if job.cell_id not in self._jobs_by_cell_id.keys(): self._jobs_by_cell_id[job.cell_id] = set() self._jobs_by_cell_id[job.cell_id].add(job.job_id) if job.batch_id: self._jobs_by_cell_id[job.cell_id].add(job.batch_id)
def get(self, notebook_id): client_ip = self.request.remote_ip http_headers = self.request.headers ua = http_headers.get('User-Agent', 'unknown') app_log.info("Http-Headers={}".format(list(self.request.headers.get_all()))) # save client ip in environ for later logging kbase_env.client_ip = client_ip app_log.debug("notebook_id = " + notebook_id) found_cookies = [self.cookies[c] for c in all_cookies if c in self.cookies] if found_cookies: # Push the cookie cookie_val = urllib.unquote(found_cookies[0].value) app_log.debug("kbase cookie = {}".format(cookie_val)) cookie_obj = cookie_pusher(cookie_val, getattr(self, 'notebook_manager')) # Log the event user = cookie_obj.get('user_id', '') session = cookie_obj.get('kbase_sessionid', '') kbase_env.narrative = notebook_id kbase_env.session = session kbase_env.client_ip = client_ip log_event(g_log, 'open', {'user': user, 'user_agent': ua}) app_log.info("After get(): KB_NARRATIVE={}".format(os.environ.get('KB_NARRATIVE', 'none'))) return old_get(self, notebook_id)
def _run_app_internal(self, app_id, params, tag, version, cell_id, run_id, dry_run): """ Attemps to run the app, returns a Job with the running app info. Should *hopefully* also inject that app into the Narrative's metadata. Probably need some kind of JavaScript-foo to get that to work. Parameters: ----------- app_id - should be from the app spec, e.g. 'build_a_metabolic_model' or 'MegaHit/run_megahit'. params - a dictionary of parameters. tag - optional, one of [release|beta|dev] (default=release) version - optional, a semantic version string. Only released modules have versions, so if the tag is not 'release', and a version is given, a ValueError will be raised. **kwargs - these are the set of parameters to be used with the app. They can be found by using the app_usage function. If any non-optional apps are missing, a ValueError will be raised. """ ws_id = strict_system_variable('workspace_id') spec = self._get_validated_app_spec(app_id, tag, True, version=version) # Preflight check the params - all required ones are present, all # values are the right type, all numerical values are in given ranges spec_params = self.spec_manager.app_params(spec) spec_params_map = dict((spec_params[i]['id'], spec_params[i]) for i in range(len(spec_params))) ws_input_refs = extract_ws_refs(app_id, tag, spec_params, params) input_vals = self._map_inputs( spec['behavior']['kb_service_input_mapping'], params, spec_params_map) service_method = spec['behavior']['kb_service_method'] service_name = spec['behavior']['kb_service_name'] service_ver = spec['behavior'].get('kb_service_version', None) # Let the given version override the spec's version. if version is not None: service_ver = version # This is what calls the function in the back end - Module.method # This isn't the same as the app spec id. function_name = service_name + '.' + service_method job_meta = {'tag': tag} if cell_id is not None: job_meta['cell_id'] = cell_id if run_id is not None: job_meta['run_id'] = run_id # This is the input set for NJSW.run_job. Now we need the workspace id # and whatever fits in the metadata. job_runner_inputs = { 'method': function_name, 'service_ver': service_ver, 'params': input_vals, 'app_id': app_id, 'wsid': ws_id, 'meta': job_meta } if len(ws_input_refs) > 0: job_runner_inputs['source_ws_objects'] = ws_input_refs if dry_run: return job_runner_inputs # We're now almost ready to run the job. Last, we need an agent token. try: token_name = 'KBApp_{}'.format(app_id) token_name = token_name[:self.__MAX_TOKEN_NAME_LEN] agent_token = auth.get_agent_token(auth.get_auth_token(), token_name=token_name) except Exception as e: raise job_runner_inputs['meta']['token_id'] = agent_token['id'] # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'version': service_ver, 'username': system_variable('user_id'), 'wsid': ws_id } kblogging.log_event(self._log, "run_app", log_info) try: job_id = clients.get("job_service", token=agent_token['token']).run_job(job_runner_inputs) except Exception as e: log_info.update({'err': str(e)}) kblogging.log_event(self._log, "run_app_error", log_info) raise transform_job_exception(e) new_job = Job(job_id, app_id, input_vals, system_variable('user_id'), tag=tag, app_version=service_ver, cell_id=cell_id, run_id=run_id, token_id=agent_token['id']) self._send_comm_message('run_status', { 'event': 'launched_job', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id, 'job_id': job_id }) JobManager().register_new_job(new_job) if cell_id is not None: return else: return new_job
def _run_app_batch_internal(self, app_id, params, tag, version, cell_id, run_id, dry_run): batch_method = "kb_BatchApp.run_batch" batch_app_id = "kb_BatchApp/run_batch" batch_method_ver = "dev" batch_method_tag = "dev" ws_id = strict_system_variable('workspace_id') spec = self._get_validated_app_spec(app_id, tag, True, version=version) # Preflight check the params - all required ones are present, all # values are the right type, all numerical values are in given ranges spec_params = self.spec_manager.app_params(spec) # A list of lists of UPAs, used for each subjob. batch_ws_upas = list() # The list of actual input values, post-mapping. batch_run_inputs = list() for param_set in params: spec_params_map = dict((spec_params[i]['id'], spec_params[i]) for i in range(len(spec_params))) batch_ws_upas.append(extract_ws_refs(app_id, tag, spec_params, param_set)) batch_run_inputs.append(self._map_inputs( spec['behavior']['kb_service_input_mapping'], param_set, spec_params_map)) service_method = spec['behavior']['kb_service_method'] service_name = spec['behavior']['kb_service_name'] service_ver = spec['behavior'].get('kb_service_version', None) # Let the given version override the spec's version. if version is not None: service_ver = version # This is what calls the function in the back end - Module.method # This isn't the same as the app spec id. job_meta = { 'tag': batch_method_tag, 'batch_app': app_id, 'batch_tag': tag, 'batch_size': len(params), } if cell_id is not None: job_meta['cell_id'] = cell_id if run_id is not None: job_meta['run_id'] = run_id # Now put these all together in a way that can be sent to the batch processing app. batch_params = [{ "module_name": service_name, "method_name": service_method, "service_ver": service_ver, "wsid": ws_id, "meta": job_meta, "batch_params": [{ "params": batch_run_inputs[i], "source_ws_objects": batch_ws_upas[i] } for i in range(len(batch_run_inputs))], }] # We're now almost ready to run the job. Last, we need an agent token. try: token_name = 'KBApp_{}'.format(app_id) token_name = token_name[:self.__MAX_TOKEN_NAME_LEN] agent_token = auth.get_agent_token(auth.get_auth_token(), token_name=token_name) except Exception as e: raise job_meta['token_id'] = agent_token['id'] # This is the input set for NJSW.run_job. Now we need the workspace id # and whatever fits in the metadata. job_runner_inputs = { 'method': batch_method, 'service_ver': batch_method_ver, 'params': batch_params, 'app_id': batch_app_id, 'wsid': ws_id, 'meta': job_meta } # if len(ws_input_refs) > 0: # job_runner_inputs['source_ws_objects'] = ws_input_refs # if we're doing a dry run, just return the inputs that we made. if dry_run: return job_runner_inputs # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': batch_method_tag, 'version': service_ver, 'username': system_variable('user_id'), 'wsid': ws_id } kblogging.log_event(self._log, "run_batch_app", log_info) try: job_id = clients.get("job_service", token=agent_token['token']).run_job(job_runner_inputs) except Exception as e: log_info.update({'err': str(e)}) kblogging.log_event(self._log, "run_batch_app_error", log_info) raise transform_job_exception(e) new_job = Job(job_id, batch_app_id, batch_params, system_variable('user_id'), tag=batch_method_tag, app_version=batch_method_ver, cell_id=cell_id, run_id=run_id, token_id=agent_token['id'], meta=job_meta) self._send_comm_message('run_status', { 'event': 'launched_job', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id, 'job_id': job_id }) JobManager().register_new_job(new_job) if cell_id is not None: return else: return new_job
def _run_app_internal(self, app_id, params, tag, version, cell_id, run_id, **kwargs): """ Attemps to run the app, returns a Job with the running app info. Should *hopefully* also inject that app into the Narrative's metadata. Probably need some kind of JavaScript-foo to get that to work. Parameters: ----------- app_id - should be from the app spec, e.g. 'build_a_metabolic_model' or 'MegaHit/run_megahit'. params - the dictionary of parameters. tag - optional, one of [release|beta|dev] (default=release) version - optional, a semantic version string. Only released modules have versions, so if the tag is not 'release', and a version is given, a ValueError will be raised. **kwargs - these are the set of parameters to be used with the app. They can be found by using the app_usage function. If any non-optional apps are missing, a ValueError will be raised. Example: -------- my_job = mm.run_app('MegaHit/run_megahit', version=">=1.0.0", read_library_name="My_PE_Library", output_contigset_name="My_Contig_Assembly") """ ### TODO: this needs restructuring so that we can send back validation failure ### messages. Perhaps a separate function and catch the errors, or return an ### error structure. # Intro tests: self.spec_manager.check_app(app_id, tag, raise_exception=True) if version is not None and tag != "release": if re.match(version, '\d+\.\d+\.\d+') is not None: raise ValueError("Semantic versions only apply to released app modules. You can use a Git commit hash instead to specify a version.") # Get the spec & params spec = self.spec_manager.get_spec(app_id, tag) # There's some branching to do here. # Cases: # app has behavior.kb_service_input_mapping -- is a valid long-running app. # app only has behavior.output_mapping - not kb_service_input_mapping or script_module - it's a viewer and should return immediately # app has other things besides kb_service_input_mapping -- not a valid app. if 'behavior' not in spec: raise Exception("This app appears invalid - it has no defined behavior") if 'kb_service_input_mapping' not in spec['behavior']: raise Exception("This app does not appear to be a long-running job! Please use 'run_local_app' to start this instead.") # Preflight check the params - all required ones are present, all values are the right type, all numerical values are in given ranges spec_params = self.spec_manager.app_params(spec) (params, ws_input_refs) = self._validate_parameters(app_id, tag, spec_params, params) ws_id = system_variable('workspace_id') if ws_id is None: raise ValueError('Unable to retrive current Narrative workspace information!') input_vals = self._map_inputs(spec['behavior']['kb_service_input_mapping'], params) service_method = spec['behavior']['kb_service_method'] service_name = spec['behavior']['kb_service_name'] service_ver = spec['behavior'].get('kb_service_version', None) service_url = spec['behavior']['kb_service_url'] # Let the given version override the spec's version. if version is not None: service_ver = version # This is what calls the function in the back end - Module.method # This isn't the same as the app spec id. function_name = service_name + '.' + service_method job_meta = {'tag': tag} if cell_id is not None: job_meta['cell_id'] = cell_id if run_id is not None: job_meta['run_id'] = run_id # This is the input set for NJSW.run_job. Now we need the worksapce id and whatever fits in the metadata. job_runner_inputs = { 'method': function_name, 'service_ver': service_ver, 'params': input_vals, 'app_id': app_id, 'wsid': ws_id, 'meta': job_meta } if len(ws_input_refs) > 0: job_runner_inputs['source_ws_objects'] = ws_input_refs # Log that we're trying to run a job... log_info = { 'app_id': app_id, 'tag': tag, 'version': service_ver, 'username': system_variable('user_id'), 'wsid': ws_id } kblogging.log_event(self._log, "run_app", log_info) try: job_id = self.njs.run_job(job_runner_inputs) except Exception as e: log_info.update({'err': str(e)}) kblogging.log_event(self._log, "run_app_error", log_info) raise transform_job_exception(e) new_job = Job(job_id, app_id, [params], system_variable('user_id'), tag=tag, app_version=service_ver, cell_id=cell_id, run_id=run_id) self._send_comm_message('run_status', { 'event': 'launched_job', 'event_at': datetime.datetime.utcnow().isoformat() + 'Z', 'cell_id': cell_id, 'run_id': run_id, 'job_id': job_id }) JobManager().register_new_job(new_job) if cell_id is not None: return else: return new_job
def _start_job_status_loop(self): kblogging.log_event(self._log, 'starting job status loop', {}) if self._lookup_timer is None: self._lookup_job_status_loop()
def list_jobs(self): """ List all job ids, their info, and status in a quick HTML format. """ try: status_set = list() for job_id in self._running_jobs: job = self._running_jobs[job_id]['job'] job_state = self._get_job_state(job_id) job_state['app_id'] = job.app_id job_state['owner'] = job.owner status_set.append(job_state) if not len(status_set): return "No running jobs!" status_set = sorted(status_set, key=lambda s: s['creation_time']) for i in range(len(status_set)): status_set[i]['creation_time'] = datetime.datetime.strftime(datetime.datetime.fromtimestamp(status_set[i]['creation_time']/1000), "%Y-%m-%d %H:%M:%S") exec_start = status_set[i].get('exec_start_time', None) if 'finish_time' in status_set[i]: finished = status_set[i].get('finish_time', None) if finished is not None and exec_start: delta = datetime.datetime.fromtimestamp(finished/1000.0) - datetime.datetime.fromtimestamp(exec_start/1000.0) delta = delta - datetime.timedelta(microseconds=delta.microseconds) status_set[i]['run_time'] = str(delta) status_set[i]['finish_time'] = datetime.datetime.strftime(datetime.datetime.fromtimestamp(status_set[i]['finish_time']/1000), "%Y-%m-%d %H:%M:%S") elif exec_start: delta = datetime.datetime.utcnow() - datetime.datetime.utcfromtimestamp(exec_start/1000.0) delta = delta - datetime.timedelta(microseconds=delta.microseconds) status_set[i]['run_time'] = str(delta) else: status_set[i]['run_time'] = 'Not started' tmpl = """ <table class="table table-bordered table-striped table-condensed"> <tr> <th>Id</th> <th>Name</th> <th>Submitted</th> <th>Submitted By</th> <th>Status</th> <th>Run Time</th> <th>Complete Time</th> </tr> {% for j in jobs %} <tr> <td>{{ j.job_id|e }}</td> <td>{{ j.app_id|e }}</td> <td>{{ j.creation_time|e }}</td> <td>{{ j.owner|e }}</td> <td>{{ j.job_state|e }}</td> <td>{{ j.run_time|e }}</td> <td>{% if j.finish_time %}{{ j.finish_time|e }}{% else %}Incomplete{% endif %}</td> </tr> {% endfor %} </table> """ return HTML(Template(tmpl).render(jobs=status_set)) except Exception as e: kblogging.log_event(self._log, "list_jobs.error", {'err': str(e)}) raise
def _construct_job_status(self, job, state): """ Creates a Job status dictionary with structure: { owner: string (username), spec: app_spec (from NMS, via biokbase.narrative.jobs.specmanager) widget_info: (if not finished, None, else...) job.get_viewer_params result state: { job_state: string, error (if present): dict of error info, cell_id: string/None, run_id: string/None, awe_job_id: string/None, canceled: 0/1 creation_time: epoch second exec_start_time: epoch/none, finish_time: epoch/none, finished: 0/1, job_id: string, status: (from UJS) [ timestamp(last_update, string), stage (string), status (string), progress (string/None), est_complete (string/None), complete (0/1), error (0/1) ], ujs_url: string } } """ widget_info = None app_spec = {} if job is None: state = { 'job_state': 'error', 'error': { 'error': 'Job does not seem to exist, or it is otherwise unavailable.', 'message': 'Job does not exist', 'name': 'Job Error', 'code': -1, 'exception': { 'error_message': 'job not found in JobManager', 'error_type': 'ValueError', 'error_stacktrace': '' } }, 'cell_id': None, 'run_id': None, } return { 'state': state, 'app_spec': app_spec, 'widget_info': widget_info, 'owner': None } # try: # app_spec = job.app_spec() # except Exception as e: # kblogging.log_event(self._log, "lookup_job_status.error", {'err': str(e)}) if state is None: kblogging.log_event(self._log, "lookup_job_status.error", {'err': 'Unable to get job state for job {}'.format(job.job_id)}) state = { 'job_state': 'error', 'error': { 'error': 'Unable to find current job state. Please try again later, or contact KBase.', 'message': 'Unable to return job state', 'name': 'Job Error', 'code': -1, 'source': 'JobManager._construct_job_status', 'exception': { 'error_message': 'No state provided during lookup', 'error_type': 'null-state', 'error_stacktrace': '', } }, 'creation_time': 0, 'cell_id': job.cell_id, 'run_id': job.run_id, 'job_id': job.job_id } elif 'lookup_error' in state: kblogging.log_event(self._log, "lookup_job_status.error", { 'err': 'Problem while getting state for job {}'.format(job.job_id), 'info': str(state['lookup_error']) }) state = { 'job_state': 'error', 'error': { 'error': 'Unable to fetch current state. Please try again later, or contact KBase.', 'message': 'Error while looking up job state', 'name': 'Job Error', 'code': -1, 'source': 'JobManager._construct_job_status', 'exception': { 'error_message': 'Error while fetching job state', 'error_type': 'failed-lookup', }, 'error_response': state['lookup_error'], 'creation_time': 0, 'cell_id': job.cell_id, 'run_id': job.run_id, 'job_id': job.job_id } } if state.get('finished', 0) == 1: try: widget_info = job.get_viewer_params(state) except Exception as e: # Can't get viewer params new_e = transform_job_exception(e) kblogging.log_event(self._log, "lookup_job_status.error", {'err': str(e)}) state['job_state'] = 'error' state['error'] = { 'error': 'Unable to generate App output viewer!\nThe App appears to have completed successfully,\nbut we cannot construct its output viewer.\nPlease contact the developer of this App for assistance.', 'message': 'Unable to build output viewer parameters!', 'name': 'App Error', 'code': getattr(new_e, "code", -1), 'source': getattr(new_e, "source", "JobManager") } if 'canceling' in self._running_jobs[job.job_id]: state['job_state'] = 'canceling' state.update({ 'child_jobs': self._child_job_states( state.get('sub_jobs', []), job.meta.get('batch_app'), job.meta.get('batch_tag') ) }) if 'batch_size' in job.meta: state.update({'batch_size': job.meta['batch_size']}) return {'state': state, 'spec': app_spec, 'widget_info': widget_info, 'owner': job.owner, 'listener_count': self._running_jobs[job.job_id]['refresh']}
def initialize_jobs(self): """ Initializes this JobManager. This is expected to be run by a running Narrative, and naturally linked to a workspace. So it does the following steps. 1. app_util.system_variable('workspace_id') 2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id) 3. initialize the Job objects by running NJS.get_job_params on each of those (also gets app_id) 4. start the status lookup loop. """ ws_id = system_variable('workspace_id') try: nar_jobs = clients.get('user_and_job_state').list_jobs2({ 'authstrat': 'kbaseworkspace', 'authparams': [str(ws_id)] }) except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) error = { 'error': 'Unable to get initial jobs list', 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'service': 'user_and_job_state' } self._send_comm_message('job_init_err', error) raise new_e for info in nar_jobs: job_id = info[0] user_info = info[1] job_meta = info[10] try: job_info = clients.get('job_service').get_job_params(job_id)[0] self._running_jobs[job_id] = { 'refresh': True, 'job': Job.from_state(job_id, job_info, user_info[0], app_id=job_info.get('app_id'), tag=job_meta.get('tag', 'release'), cell_id=job_meta.get('cell_id', None), run_id=job_meta.get('run_id', None)) } except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) error = { 'error': 'Unable to get job info on initial lookup', 'job_id': job_id, 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'service': 'job_service' } self._send_comm_message('job_init_lookup_err', error) raise new_e # should crash and burn on any of these. if not self._running_lookup_loop: # only keep one loop at a time in cause this gets called again! if self._lookup_timer is not None: self._lookup_timer.cancel() self._running_lookup_loop = True self._lookup_job_status_loop() else: self._lookup_all_job_status()
def _write(self, level, event, kvp): kvp['severity'] = logging.getLevelName(level) kblogging.log_event(self._log, event, kvp)
def _construct_job_status(self, job_id): """ Always creates a Job Status. It'll embed error messages into the status if there are problems. """ state = {} widget_info = None app_spec = {} job = self.get_job(job_id) if job is None: state = { 'job_state': 'error', 'error': { 'error': 'Job does not seem to exist, or it is otherwise unavailable.', 'message': 'Job does not exist', 'name': 'Job Error', 'code': -1, 'exception': { 'error_message': 'job not found in JobManager', 'error_type': 'ValueError', 'error_stacktrace': '' } }, 'cell_id': None, 'run_id': None } return { 'state': state, 'app_spec': app_spec, 'widget_info': widget_info, 'owner': None } try: app_spec = job.app_spec() except Exception as e: kblogging.log_event(self._log, "lookup_job_status.error", {'err': str(e)}) try: state = job.state() except Exception as e: kblogging.log_event(self._log, "lookup_job_status.error", {'err': str(e)}) new_e = transform_job_exception(e) e_type = type(e).__name__ e_message = str(new_e).replace('<', '<').replace('>', '>') e_trace = traceback.format_exc().replace('<', '<').replace('>', '>') e_code = getattr(new_e, "code", -2) e_source = getattr(new_e, "source", "JobManager") state = { 'job_state': 'error', 'error': { 'error': 'Unable to find current job state. Please try again later, or contact KBase.', 'message': 'Unable to return job state', 'name': 'Job Error', 'code': e_code, 'source': e_source, 'exception': { 'error_message': e_message, 'error_type': e_type, 'error_stacktrace': e_trace, } }, 'creation_time': 0, 'cell_id': job.cell_id, 'run_id': job.run_id, 'job_id': job_id } if state.get('finished', 0) == 1: try: widget_info = job.get_viewer_params(state) except Exception as e: # Can't get viewer params new_e = transform_job_exception(e) kblogging.log_event(self._log, "lookup_job_status.error", {'err': str(e)}) state['job_state'] = 'error' state['error'] = { 'error': 'Unable to generate App output viewer!\nThe App appears to have completed successfully,\nbut we cannot construct its output viewer.\nPlease contact the developer of this App for assistance.', 'message': 'Unable to build output viewer parameters!', 'name': 'App Error', 'code': getattr(new_e, "code", -1), 'source': getattr(new_e, "source", "JobManager") } if 'canceling' in self._running_jobs[job_id]: state['job_state'] = 'canceling' return {'state': state, 'spec': app_spec, 'widget_info': widget_info, 'owner': job.owner}
def initialize_jobs(self, start_lookup_thread=True): """ Initializes this JobManager. This is expected to be run by a running Narrative, and naturally linked to a workspace. So it does the following steps. 1. app_util.system_variable('workspace_id') 2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id) 3. initialize the Job objects by running NJS.get_job_params (also gets app_id) 4. start the status lookup loop. """ the_time = int(round(time.time() * 1000)) self._send_comm_message('start', {'time': the_time}) ws_id = system_variable('workspace_id') try: nar_jobs = clients.get('user_and_job_state').list_jobs2({ 'authstrat': 'kbaseworkspace', 'authparams': [str(ws_id)] }) except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) error = { 'error': 'Unable to get initial jobs list', 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'service': 'user_and_job_state' } self._send_comm_message('job_init_err', error) raise new_e job_ids = [j[0] for j in nar_jobs] job_states = clients.get('job_service').check_jobs({ 'job_ids': job_ids, 'with_job_params': 1 }) job_param_info = job_states.get('job_params', {}) job_check_error = job_states.get('check_error', {}) error_jobs = dict() for info in nar_jobs: job_id = info[0] user_info = info[1] job_meta = info[10] try: if job_id in job_param_info: job_info = job_param_info[job_id] job = Job.from_state(job_id, job_info, user_info[0], app_id=job_info.get('app_id'), tag=job_meta.get('tag', 'release'), cell_id=job_meta.get('cell_id', None), run_id=job_meta.get('run_id', None), token_id=job_meta.get('token_id', None), meta=job_meta) # Note that when jobs for this narrative are initially loaded, # they are set to not be refreshed. Rather, if a client requests # updates via the start_job_update message, the refresh flag will # be set to True. self._running_jobs[job_id] = { 'refresh': 0, 'job': job } elif job_id in job_check_error: job_err_state = { 'job_state': 'error', 'error': { 'error': 'KBase execution engine returned an error while looking up this job.', 'message': job_check_error[job_id].get('message', 'No error message available'), 'name': 'Job Error', 'code': job_check_error[job_id].get('code', -999), 'exception': { 'error_message': 'Job lookup in execution engine failed', 'error_type': job_check_error[job_id].get('name', 'unknown'), 'error_stacktrace': job_check_error[job_id].get('error', '') } }, 'cell_id': job_meta.get('cell_id', None), 'run_id': job_meta.get('run_id', None), } error_jobs[job_id] = job_err_state except Exception as e: kblogging.log_event(self._log, 'init_error', {'err': str(e)}) new_e = transform_job_exception(e) error = { 'error': 'Unable to get job info on initial lookup', 'job_id': job_id, 'message': getattr(new_e, 'message', 'Unknown reason'), 'code': getattr(new_e, 'code', -1), 'source': getattr(new_e, 'source', 'jobmanager'), 'name': getattr(new_e, 'name', type(e).__name__), 'service': 'job_service' } self._send_comm_message('job_init_lookup_err', error) raise new_e # should crash and burn on any of these. if len(job_check_error): err_str = 'Unable to find info for some jobs on initial lookup' err_type = 'job_init_partial_err' if len(job_check_error) == len(nar_jobs): err_str = 'Unable to get info for any job on initial lookup' err_type = 'job_init_lookup_err' error = { 'error': err_str, 'job_errors': error_jobs, 'message': 'Job information was unavailable from the server', 'code': -2, 'source': 'jobmanager', 'name': 'jobmanager', 'service': 'job_service', } self._send_comm_message(err_type, error) if not self._running_lookup_loop and start_lookup_thread: # only keep one loop at a time in cause this gets called again! if self._lookup_timer is not None: self._lookup_timer.cancel() self._running_lookup_loop = True self._lookup_job_status_loop() else: self._lookup_all_job_status()