Esempio n. 1
0
 def test_write_narrative_shared_write_access(self):
     if self.test_token is None or self.private_token is None:
         self.skipTest("Missing auth token(s)")
     # login as private_user
     # set unauth_nar perms to allow test_user w access
     # logout
     self.login(token=self.private_token)
     ws_client = clients.get("workspace")
     ws_client.set_permissions({
         "id": self.unauth_nar["ws"],
         "new_permission": "w",
         "users": [self.test_user],
     })
     self.logout()
     # login as test_user
     # re-save unauth_nar
     # should succeed
     # logout
     self.login(token=self.test_token)
     nar = self.mixin.read_narrative(self.unauth_nar["ref"])["data"]
     self.mixin.write_narrative(self.unauth_nar["ref"], nar, self.test_user)
     self.logout()
     # log back in as private_user
     # remove perms from test_user
     # log back out
     self.login(token=self.private_token)
     ws_client = clients.get("workspace")
     ws_client.set_permissions({
         "id": self.unauth_nar["ws"],
         "new_permission": "n",
         "users": [self.test_user],
     })
     self.logout()
Esempio n. 2
0
    def delete_job(self, job_id, parent_job_id=None):
        """
        If the job_id doesn't exist, raises a ValueError.
        Attempts to delete a job, and cancels it first. If the job cannot be canceled,
        raises an exception. If it can be canceled but not deleted, it gets canceled, then raises
        an exception.
        """
        if job_id is None:
            raise ValueError('Job id required for deletion!')
        if not parent_job_id and job_id not in self._running_jobs:
            self._send_comm_message('job_does_not_exist', {'job_id': job_id, 'source': 'delete_job'})
            return
            # raise ValueError('Attempting to cancel a Job that does not exist!')

        try:
            self.cancel_job(job_id, parent_job_id=parent_job_id)
        except Exception:
            raise

        try:
            clients.get('user_and_job_state').delete_job(job_id)
        except Exception:
            raise

        if job_id in self._running_jobs:
            del self._running_jobs[job_id]
        if job_id in self._completed_job_states:
            del self._completed_job_states[job_id]
        self._send_comm_message('job_deleted', {'job_id': job_id})
Esempio n. 3
0
def resolve_single_ref(workspace, value):
    ret = None
    if '/' in value:
        path_items = [item.strip() for item in value.split(';')]
        for path_item in path_items:
            if len(path_item.split('/')) > 3:
                raise ValueError(
                    'Object reference {} has too many slashes  - should be workspace/object/version(optional)'
                    .format(value))
            # return (ws_ref, 'Data reference named {} does not have the right format - should be workspace/object/version(optional)')
        info = clients.get('workspace').get_object_info_new(
            {'objects': [{
                'ref': value
            }]})[0]
        path_items[len(path_items) - 1] = "{}/{}/{}".format(
            info[6], info[0], info[4])
        ret = ';'.join(path_items)
    # Otherwise, assume it's a name, not a reference.
    else:
        info = clients.get('workspace').get_object_info_new(
            {'objects': [{
                'workspace': workspace,
                'name': value
            }]})[0]
        ret = "{}/{}/{}".format(info[6], info[0], info[4])
    return ret
Esempio n. 4
0
    def _create_jobs(self, job_ids):
        """
        TODO: error handling
        Makes a bunch of Job objects from job_ids.
        Initially used to make Child jobs from some parent, but will eventually be adapted to all jobs on startup.
        Just slaps them all into _running_jobs
        """
        job_states = clients.get('job_service').check_jobs({'job_ids': job_ids, 'with_job_params': 1})
        for job_id in job_ids:
            ujs_info = clients.get('user_and_job_state').get_job_info2(job_id)

            if job_id in job_ids and job_id not in self._running_jobs:
                job_info = job_states.get('job_params', {}).get(job_id, {})
                job_meta = ujs_info[10]
                job = Job.from_state(job_id,                                     # the id
                                     job_info,                                   # params, etc.
                                     ujs_info[2],                                # owner id
                                     app_id=job_info.get('app_id', job_info.get('method')),
                                     tag=job_meta.get('tag', 'release'),
                                     cell_id=job_meta.get('cell_id', None),
                                     run_id=job_meta.get('run_id', None),
                                     token_id=job_meta.get('token_id', None),
                                     meta=job_meta)

                # Note that when jobs for this narrative are initially loaded,
                # they are set to not be refreshed. Rather, if a client requests
                # updates via the start_job_update message, the refresh flag will
                # be set to True.
                self._running_jobs[job_id] = {
                    'refresh': 0,
                    'job': job
                }
Esempio n. 5
0
 def test_write_narrative_shared_write_access(self):
     if self.test_token is None or self.private_token is None:
         self.skipTest("Missing auth token(s)")
     # login as private_user
     # set unauth_nar perms to allow test_user w access
     # logout
     self.login(token=self.private_token)
     ws_client = clients.get('workspace')
     ws_client.set_permissions({'id': self.unauth_nar['ws'], 'new_permission': 'w', 'users': [self.test_user]})
     self.logout()
     # login as test_user
     # re-save unauth_nar
     # should succeed
     # logout
     self.login(token=self.test_token)
     nar = self.mixin.read_narrative(self.unauth_nar['ref'])['data']
     self.mixin.write_narrative(self.unauth_nar['ref'], nar, self.test_user)
     self.logout()
     # log back in as private_user
     # remove perms from test_user
     # log back out
     self.login(token=self.private_token)
     ws_client = clients.get('workspace')
     ws_client.set_permissions({'id': self.unauth_nar['ws'], 'new_permission': 'n', 'users': [self.test_user]})
     self.logout()
Esempio n. 6
0
def resolve_single_ref(workspace, value):
    ret = None
    if "/" in value:
        path_items = [item.strip() for item in value.split(";")]
        for path_item in path_items:
            if len(path_item.split("/")) > 3:
                raise ValueError(
                    "Object reference {} has too many slashes  - should be workspace/object/version(optional)"
                    .format(value))
            # return (ws_ref, 'Data reference named {} does not have the right format
            # - should be workspace/object/version(optional)')
        info = clients.get("workspace").get_object_info_new(
            {"objects": [{
                "ref": value
            }]})[0]
        path_items[len(path_items) - 1] = "{}/{}/{}".format(
            info[6], info[0], info[4])
        ret = ";".join(path_items)
    # Otherwise, assume it's a name, not a reference.
    else:
        info = clients.get("workspace").get_object_info_new(
            {"objects": [{
                "workspace": workspace,
                "name": value
            }]})[0]
        ret = "{}/{}/{}".format(info[6], info[0], info[4])
    return ret
Esempio n. 7
0
 def test_write_narrative_shared_write_access(self):
     if self.test_token is None or self.private_token is None:
         self.skipTest("Missing auth token(s)")
     # login as private_user
     # set unauth_nar perms to allow test_user w access
     # logout
     self.login(token=self.private_token)
     ws_client = clients.get('workspace')
     ws_client.set_permissions({
         'id': self.unauth_nar['ws'],
         'new_permission': 'w',
         'users': [self.test_user]
     })
     self.logout()
     # login as test_user
     # re-save unauth_nar
     # should succeed
     # logout
     self.login(token=self.test_token)
     nar = self.mixin.read_narrative(self.unauth_nar['ref'])['data']
     self.mixin.write_narrative(self.unauth_nar['ref'], nar, self.test_user)
     self.logout()
     # log back in as private_user
     # remove perms from test_user
     # log back out
     self.login(token=self.private_token)
     ws_client = clients.get('workspace')
     ws_client.set_permissions({
         'id': self.unauth_nar['ws'],
         'new_permission': 'n',
         'users': [self.test_user]
     })
     self.logout()
Esempio n. 8
0
    def cancel_job(self, job_id, parent_job_id=None):
        """
        Cancels a running job, placing it in a canceled state.
        Does NOT delete the job.
        Raises an exception if the current user doesn't have permission to cancel the job.
        """

        if job_id is None:
            raise ValueError('Job id required for cancellation!')
        if not parent_job_id and job_id not in self._running_jobs:
            self._send_comm_message('job_does_not_exist', {
                'job_id': job_id,
                'source': 'cancel_job'
            })
            return

        try:
            state = self._get_job_state(job_id, parent_job_id=parent_job_id)
            if state.get('canceled', 0) == 1 or state.get('finished', 0) == 1:
                # It's already finished, don't try to cancel it again.
                return
        except Exception as e:
            raise ValueError('Unable to get Job state')

        # Stop updating the job status while we try to cancel.
        # Also, set it to have a special state of 'canceling' while we're doing the cancel
        if not parent_job_id:
            is_refreshing = self._running_jobs[job_id].get('refresh', 0)
            self._running_jobs[job_id]['refresh'] = 0
            self._running_jobs[job_id]['canceling'] = True
        try:
            clients.get('job_service').cancel_job({'job_id': job_id})
        except Exception as e:
            new_e = transform_job_exception(e)
            error = {
                'error': 'Unable to get cancel job',
                'message': getattr(new_e, 'message', 'Unknown reason'),
                'code': getattr(new_e, 'code', -1),
                'source': getattr(new_e, 'source', 'jobmanager'),
                'name': getattr(new_e, 'name',
                                type(e).__name__),
                'request_type': 'cancel_job',
                'job_id': job_id
            }
            self._send_comm_message('job_comm_error', error)
            raise (e)
        finally:
            if not parent_job_id:
                self._running_jobs[job_id]['refresh'] = is_refreshing
                del self._running_jobs[job_id]['canceling']

        # Rather than a separate message, how about triggering a job-status message:
        self._lookup_job_status(job_id, parent_job_id=parent_job_id)
Esempio n. 9
0
 def _cancel_job(self, job_id: str) -> None:
     # Stop updating the job status while we try to cancel.
     # Set the job to a special state of 'canceling' while we're doing the cancel
     is_refreshing = self._running_jobs[job_id].get("refresh", False)
     self._running_jobs[job_id]["refresh"] = False
     self._running_jobs[job_id]["canceling"] = True
     error = None
     try:
         clients.get("execution_engine2").cancel_job({"job_id": job_id})
     except Exception as e:
         error = transform_job_exception(e, "Unable to cancel job")
     self._running_jobs[job_id]["refresh"] = is_refreshing
     del self._running_jobs[job_id]["canceling"]
     return error
Esempio n. 10
0
    def cancel_job(self, job_id):
        """
        Cancels a running job, placing it in a canceled state.
        Does NOT delete the job.
        Raises an exception if the current user doesn't have permission to cancel the job.
        """

        if job_id is None:
            raise ValueError('Job id required for cancellation!')
        if job_id not in self._running_jobs:
            self._send_comm_message('job_does_not_exist', {'job_id': job_id, 'source': 'cancel_job'})
            return

        try:
            job = self.get_job(job_id)
            state = job.state()
            if state.get('canceled', 0) == 1 or state.get('finished', 0) == 1:
                # It's already finished, don't try to cancel it again.
                return
        except Exception as e:
            raise ValueError('Unable to get Job state')

        # Stop updating the job status while we try to cancel.
        # Also, set it to have a special state of 'canceling' while we're doing the cancel
        is_refreshing = self._running_jobs[job_id].get('refresh', False)
        self._running_jobs[job_id]['refresh'] = False
        self._running_jobs[job_id]['canceling'] = True
        try:
            clients.get('job_service').cancel_job({'job_id': job_id})
        except Exception as e:
            new_e = transform_job_exception(e)
            error = {
                'error': 'Unable to get cancel job',
                'message': getattr(new_e, 'message', 'Unknown reason'),
                'code': getattr(new_e, 'code', -1),
                'source': getattr(new_e, 'source', 'jobmanager'),
                'name': getattr(new_e, 'name', type(e).__name__),
                'request_type': 'cancel_job',
                'job_id': job_id
            }
            self._send_comm_message('job_comm_error', error)
            raise(e)
        finally:
            self._running_jobs[job_id]['refresh'] = is_refreshing
            del self._running_jobs[job_id]['canceling']

        #
        # self._send_comm_message('job_canceled', {'job_id': job_id})
        # Rather than a separate message, how about triggering a job-status message:
        self._lookup_job_status(job_id)
Esempio n. 11
0
 def _get_narrative_objid(self):
     """
     Attempts to find the Narrative object id given a workspace id.
     This is only called on the internal wsid, which must be an int.
     Can raise:
         - PermissionsError
             - if the current user doesn't have access to that workspace
         - RuntimeError
             - if there's anything wrong with the workspace metadata that's
               supposed to contain the narrative object id (either missing
               or not an int)
         - ServerError
             - if anything else bad happens from the Workspace
     """
     objid = None
     try:
         ws_meta = clients.get("workspace").get_workspace_info({"id": self.wsid})[8]
         objid = ws_meta.get("narrative")
         return int(objid)
     except (ValueError, TypeError):
         err = ""
         if objid is None:
             err = "Couldn't find Narrative object id in Workspace metadata."
         else:
             err = ("Expected an integer while looking up the Narrative object id, " \
                    "got '{}'".format(objid))
         raise RuntimeError(err)
     except ServerError as err:
         raise WorkspaceError(err, self.wsid)
Esempio n. 12
0
def list_objects(obj_type=None, name=None, fuzzy_name=True):
    """
    Returns a list of all objects in the current workspace with type=obj_type
    obj_type is a string. if None, return all visible objects (no reports, data palettes, etc.)
    name is a string. if None, then return everything. if not None, use that string to filter the search. if fuzzy_name is set to True, use that string
    as a search filter. e.g., "foo" would match "Foobar" and "Bazfoo"
    However, it doesn't go the other way. If name="Foobar" it will not match an object named "foo"
    If fuzzy_name is False, only exact (case-insensitive) matches are allowed.
    This has limited use, I know, but it's useful for fetching UPAs for objects you know, or names you're iterating over another way.

    This first prototype just returns a list of dictionaries, where each dict contains 'type', 'upa', and 'name' keys for each object.
    """
    ws_name = system_variable('workspace')
    service = clients.get('service')
    service_params = {'ws_name': ws_name}
    if obj_type is not None:
        # matches:
        # foo.bar
        # foo.bar-1.0
        # doesn't match:
        # foo
        # foo.bar-
        # foobar-
        # foo.bar-1.2.0
        if not re.match(r"[A-Za-z]+\.[A-Za-z]+(-\d+\.\d+)?$", obj_type):
            raise ValueError(
                '{} is not a valid type. Valid types are of the format "Module.Type" or "Module.Type-Version"'
                .format(obj_type))
        service_params['types'] = [obj_type]
    all_obj = service.sync_call('NarrativeService.list_objects_with_sets',
                                [service_params])[0]
    obj_list = list()
    for obj in all_obj['data']:
        # filtration!
        # 1. ignore narratives
        if 'KBaseNarrative.Narrative' in obj['object_info'][2]:
            continue
        # 2. name filter
        if name is not None:
            name = str(name).lower()
            # if we're not strict, just search for the string
            if fuzzy_name is True and name not in obj['object_info'][1].lower(
            ):
                continue
            elif fuzzy_name is False and name != obj['object_info'][1].lower():
                continue
        upa_prefix = ''  # gavin's gonna wreck me.
        if 'dp_info' in obj:  # seriously.
            upa_prefix = obj['dp_info'][
                'ref'] + ';'  # not like I want to support this, either...
        info = obj['object_info']
        obj_list.append({
            "upa":
            "{}{}/{}/{}".format(upa_prefix, info[6], info[0], info[4]),
            "name":
            info[1],
            "type":
            info[2]
        })
    return obj_list
Esempio n. 13
0
 def state(self):
     """
     Queries the job service to see the status of the current job.
     Returns a <something> stating its status. (string? enum type? different traitlet?)
     """
     if self._last_state is not None and self._last_state.get('status') in [
             'completed', 'terminated', 'error'
     ]:
         return self._last_state
     try:
         state = clients.get('execution_engine2').check_job({
             'job_id':
             self.job_id,
             'exclude_fields':
             EXCLUDED_JOB_STATE_FIELDS
         })
         state['job_output'] = state.get('job_output', {})
         state['cell_id'] = self.cell_id
         state['run_id'] = self.run_id
         state['token_id'] = self.token_id
         self._last_state = state
         return dict(state)
     except Exception as e:
         raise Exception(
             f"Unable to fetch info for job {self.job_id} - {e}")
Esempio n. 14
0
 def _get_narrative_objid(self):
     """
     Attempts to find the Narrative object id given a workspace id.
     This is only called on the internal wsid, which must be an int.
     Can raise:
         - PermissionsError
             - if the current user doesn't have access to that workspace
         - RuntimeError
             - if there's anything wrong with the workspace metadata that's
               supposed to contain the narrative object id (either missing
               or not an int)
         - ServerError
             - if anything else bad happens from the Workspace
     """
     objid = None
     try:
         ws_meta = clients.get("workspace").get_workspace_info(
             {"id": self.wsid})[8]
         objid = ws_meta.get("narrative")
         return int(objid)
     except (ValueError, TypeError):
         err = ""
         if objid is None:
             err = "Couldn't find Narrative object id in Workspace metadata."
         else:
             err = ("Expected an integer while looking up the Narrative object id, " \
                    "got '{}'".format(objid))
         raise RuntimeError(err)
     except ServerError as err:
         raise WorkspaceError(err, self.wsid)
Esempio n. 15
0
def get_df(ws_ref, col_attributes=(), row_attributes=(), clustergrammer=False):
    """
    Gets a dataframe from the WS object

    :param ws_ref: The Workspace reference of the 2DMatrix containing object
    :param col_attributes: Which column attributes should appear in the resulting DataFrame as a
        multiIndex. Defaults to all attributes, pass None to use a simple index of only ID.
    :param row_attributes: Which row attributes should appear in the resulting DataFrame as a
        multiIndex. Defaults to all attributes, pass None to use a simple index of only ID.
    :param clustergrammer: Returns a DataFrame with Clustergrammer compatible indices and columns.
        Defaults to False.
    :return: A Pandas DataFrame
    """

    ws = clients.get('workspace')
    if "/" not in ws_ref:
        ws_ref = "{}/{}".format(system_variable('workspace'), ws_ref)
    generic_data = ws.get_objects2({'objects': [{'ref': ws_ref}]})['data'][0]['data']
    if not _is_compatible_matrix(generic_data):
        raise ValueError("{} is not a compatible data type for this viewer. Data type must "
                         "contain a 'data' key with a FloatMatrix2D type value".format(ws_ref))
    cols = _get_categories(generic_data['data']['col_ids'],
                           ws_ref,
                           generic_data.get('col_attributemapping_ref'),
                           generic_data.get('col_mapping'),
                           col_attributes,
                           clustergrammer)
    rows = _get_categories(generic_data['data']['row_ids'],
                           ws_ref,
                           generic_data.get('row_attributemapping_ref'),
                           generic_data.get('row_mapping'),
                           row_attributes,
                           clustergrammer)
    return pd.DataFrame(data=generic_data['data']['values'], columns=cols, index=rows)
Esempio n. 16
0
 def state(self):
     """
     Queries the job service to see the status of the current job.
     Returns a <something> stating its status. (string? enum type? different traitlet?)
     """
     if self._last_state is not None and self._last_state.get("status") in [
             "completed",
             "terminated",
             "error",
     ]:
         return self._last_state
     try:
         state = clients.get("execution_engine2").check_job({
             "job_id":
             self.job_id,
             "exclude_fields":
             EXCLUDED_JOB_STATE_FIELDS
         })
         state["job_output"] = state.get("job_output", {})
         state["cell_id"] = self.cell_id
         state["run_id"] = self.run_id
         state["token_id"] = self.token_id
         self._last_state = state
         return dict(state)
     except Exception as e:
         raise Exception(
             f"Unable to fetch info for job {self.job_id} - {e}")
Esempio n. 17
0
    def _run_dynamic_service_internal(self, app_id, params, tag, version,
                                      cell_id, run_id, **kwargs):
        # Intro tests:
        self.spec_manager.check_app(app_id, tag, raise_exception=True)

        if version is not None and tag != "release":
            raise ValueError(
                "App versions only apply to released app modules!")

        # Get the spec & params
        spec = self.spec_manager.get_spec(app_id, tag)

        if 'behavior' not in spec:
            raise ValueError(
                "This app appears invalid - it has no defined behavior")

        behavior = spec['behavior']

        if 'script_module' in behavior or 'script_name' in behavior:
            # It's an old NJS script. These don't work anymore.
            raise ValueError(
                'This app relies on a service that is now obsolete. Please contact the administrator.'
            )

        # Log that we're trying to run a job...
        log_info = {
            'app_id': app_id,
            'tag': tag,
            'username': system_variable('user_id'),
            'ws': system_variable('workspace')
        }
        kblogging.log_event(self._log, "run_dynamic_service", log_info)

        # Silly to keep this here, but we do not validate the incoming parameters.
        # If they are provided by the UI (we have cell_id), they are constructed
        # according to the spec, so are trusted;
        # Otherwise, if they are the product of direct code cell entry, this is a mode we do not
        # "support", so we can let it fail hard.
        # In the future when code cell interaction is supported for users, we will need to provide
        # robust validation and error reporting, but this may end up being (should be) provided by the
        # sdk execution infrastructure anyway

        input_vals = params
        function_name = spec['behavior']['kb_service_name'] + '.' + spec[
            'behavior']['kb_service_method']
        try:
            result = clients.get("service").sync_call(function_name,
                                                      input_vals,
                                                      service_version=tag)[0]
            # if a ui call (a cell_id is defined) we send a result message, otherwise
            # just the raw result for display in a code cell. This is how we "support"
            # code cells for internal usage.
            if cell_id:
                self.send_cell_message('result', cell_id, run_id,
                                       {'result': result})
            else:
                return result
        except:
            raise
Esempio n. 18
0
 def _update_log(self):
     log_update = clients.get("execution_engine2").get_job_logs({
         "job_id":
         self.job_id,
         "skip_lines":
         len(self._job_logs)
     })
     if log_update["lines"]:
         self._job_logs = self._job_logs + log_update["lines"]
Esempio n. 19
0
    def _get_all_job_states(self, job_ids=None):
        """
        Returns the state for all running jobs.
        Returns a list where each element has this structure:
        {
            cell_id: (optional) id of the cell that spawned the job
            run_id: (optional) id of the job run
            awe_job_state: string
            creation_time: timestamp (ms since epoch)
            finished: 0/1
            job_id: string
            job_state: string
            status: [ timestamp, _, _, _, _, _, _ ], (7-tuple)
            sub_jobs: [],
            ujs_url: string,
            child_jobs: []
        }
        """
        # 1. Get list of ids
        if job_ids is None:
            job_ids = self._running_jobs.keys()
        # 1.5 Go through job ids and remove ones that aren't found.
        job_ids = [j for j in job_ids if j in self._running_jobs]
        # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id
        # for batch lookup.
        job_states = dict()
        jobs_to_lookup = list()
        for job_id in job_ids:
            if job_id in self._completed_job_states:
                job_states[job_id] = dict(self._completed_job_states[job_id])
            else:
                jobs_to_lookup.append(job_id)
        # 3. Lookup those jobs what need it. Cache 'em as we go, if finished.
        try:
            fetched_states = clients.get('job_service').check_jobs(
                {'job_ids': jobs_to_lookup})
            fetched_states = sanitize_all_states(fetched_states)
        except Exception as e:
            kblogging.log_event(self._log, 'get_all_job_states_error',
                                {'err': str(e)})
            return {}

        error_states = fetched_states.get('check_error', {})
        fetched_states = fetched_states.get('job_states', {})
        for job_id in jobs_to_lookup:
            if job_id in fetched_states:
                state = fetched_states[job_id]
                state['cell_id'] = self._running_jobs[job_id]['job'].cell_id
                state['run_id'] = self._running_jobs[job_id]['job'].run_id
                if state.get('finished', 0) == 1:
                    self._completed_job_states[state['job_id']] = dict(state)
                job_states[state['job_id']] = state
            elif job_id in error_states:
                error = error_states[job_id]
                job_states[state['job_id']] = {'lookup_error': error}

        return job_states
Esempio n. 20
0
 def _update_log(self):
     log_update = clients.get("job_service").get_job_logs({
         'job_id':
         self.job_id,
         'skip_lines':
         len(self._job_logs)
     })
     if log_update['lines']:
         self._job_logs = self._job_logs + log_update['lines']
Esempio n. 21
0
 def _update_log(self):
     log_update = clients.get("execution_engine2").get_job_logs({
         'job_id':
         self.job_id,
         'skip_lines':
         len(self._job_logs)
     })
     if log_update['lines']:
         self._job_logs = self._job_logs + log_update['lines']
Esempio n. 22
0
def generate_app_cell(validated_spec=None, spec_tuple=None):
    """Produces an invisible blob of JavaScript that inserts a new cell in the notebook,
    and crams the validated_spec in it. It then removes itself, so it won't happen again
    on page reload.

    For the inputs, validated_spec > spec_tuple. That is, if validated_spec is present,
    that's always used. if spec_tuple is there, and validated_spec is not, then the
    tuple's used.

    Also, the tuple should be (spec_json, display_yaml), all as strings.
    """

    if spec_tuple is not None and validated_spec is None:
        nms = clients.get("narrative_method_store")
        validated = nms.validate_method({
            "id": "some_test_app",
            "spec_json": spec_tuple[0],
            "display_yaml": spec_tuple[1],
        })
        if validated.get("is_valid", 0) == 1:
            validated_spec = validated["method_spec"]
        elif "errors" in validated and validated["errors"]:
            raise Exception(validated["errors"])

    # Each of the values of the validated spec needs to be escaped for JS.
    # Specifically we turn " -> &quot; and ' -> &apos;
    # This isn't done so much on the frontend because of how it's already interpreted and
    # injected into the cell metadata,
    # but it's necessary for this little function.

    if "info" in validated_spec:
        for key in ["name", "subtitle", "tooltip"]:
            validated_spec["info"][key] = _fix_quotes(
                validated_spec["info"].get(key, ""))

    if "parameters" in validated_spec:
        for i in range(len(validated_spec["parameters"])):
            p = validated_spec["parameters"][i]
            for key in ["ui_name", "short_hint", "description"]:
                p[key] = _fix_quotes(p.get(key, ""))

    js_template = """
        var outputArea = this,
            cellElement = outputArea.element.parents('.cell'),
            cellIdx = Jupyter.notebook.get_cell_elements().index(cellElement),
            thisCell = Jupyter.notebook.get_cell(cellIdx),
            spec_json = '{{spec}}',
            cellData = {
                type: 'devapp',
                appTag: 'dev',
                appSpec: JSON.parse(spec_json)
            };
        Jupyter.narrative.insertAndSelectCell('code', 'below', cellIdx, cellData);
    """
    js_code = Template(js_template).render(spec=json.dumps(validated_spec))

    return Javascript(data=js_code, lib=None, css=None)
Esempio n. 23
0
 def query_ee2_state(
     job_id: str,
     init: bool = True,
 ) -> dict:
     return clients.get("execution_engine2").check_job({
         "job_id":
         job_id,
         "exclude_fields": (JOB_INIT_EXCLUDED_JOB_STATE_FIELDS
                            if init else EXCLUDED_JOB_STATE_FIELDS),
     })
Esempio n. 24
0
    def _get_all_job_states(self, job_ids=None):
        """
        Returns the state for all running jobs.
        Returns a list where each element has this structure:
        {
            cell_id: (optional) id of the cell that spawned the job
            run_id: (optional) id of the job run
            awe_job_state: string
            creation_time: timestamp (ms since epoch)
            finished: 0/1
            job_id: string
            job_state: string
            status: [ timestamp, _, _, _, _, _, _ ], (7-tuple)
            sub_jobs: [],
            ujs_url: string,
            child_jobs: []
        }
        """
        # 1. Get list of ids
        if job_ids is None:
            job_ids = self._running_jobs.keys()
        # 1.5 Go through job ids and remove ones that aren't found.
        job_ids = [j for j in job_ids if j in self._running_jobs]
        # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id
        # for batch lookup.
        job_states = dict()
        jobs_to_lookup = list()
        for job_id in job_ids:
            if job_id in self._completed_job_states:
                job_states[job_id] = dict(self._completed_job_states[job_id])
            else:
                jobs_to_lookup.append(job_id)
        # 3. Lookup those jobs what need it. Cache 'em as we go, if finished.
        try:
            fetched_states = clients.get('job_service').check_jobs({'job_ids': jobs_to_lookup})
            fetched_states = sanitize_all_states(fetched_states)
        except Exception as e:
            kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)})
            return {}

        error_states = fetched_states.get('check_error', {})
        fetched_states = fetched_states.get('job_states', {})
        for job_id in jobs_to_lookup:
            if job_id in fetched_states:
                state = fetched_states[job_id]
                state['cell_id'] = self._running_jobs[job_id]['job'].cell_id
                state['run_id'] = self._running_jobs[job_id]['job'].run_id
                if state.get('finished', 0) == 1:
                    self._completed_job_states[state['job_id']] = dict(state)
                job_states[state['job_id']] = state
            elif job_id in error_states:
                error = error_states[job_id]
                job_states[state['job_id']] = {'lookup_error': error}

        return job_states
Esempio n. 25
0
def _get_categories(
    ids,
    matrix_ref,
    attributemapping_ref=None,
    mapping=None,
    whitelist=(),
    clustergrammer=False,
):
    """Creates the correct kind of multi-factor index for clustergrammer display"""
    if not attributemapping_ref or whitelist is None:
        return ids
    cat_list = []
    ws = clients.get("workspace")
    attribute_data = ws.get_objects2(
        {"objects": [{"ref": matrix_ref + ";" + attributemapping_ref}]}
    )["data"][0]["data"]

    if not mapping:
        mapping = {x: x for x in ids}
    whitelist = set(whitelist)

    for _id in ids:
        try:
            attribute_values = attribute_data["instances"][mapping[_id]]
        except KeyError:
            if _id not in mapping:
                raise ValueError(
                    "Row or column id {} is not in the provided mapping".format(_id)
                )
            raise ValueError(
                "AttributeMapping {} has no attribute {} which corresponds to row or "
                "column id {} in the provided object.".format(
                    attributemapping_ref, mapping[_id], _id
                )
            )
        cats = [_id]
        for i, val in enumerate(attribute_values):
            cat_name = attribute_data["attributes"][i]["attribute"]
            if whitelist and cat_name not in whitelist:
                continue
            if clustergrammer:
                cats.append("{}: {}".format(cat_name, val))
            else:
                cats.append(val)
        cat_list.append(tuple(cats))

    if clustergrammer:
        return cat_list
    attribute_names = [
        x["attribute"]
        for x in attribute_data["attributes"]
        if not whitelist or x["attribute"] in whitelist
    ]
    return pd.MultiIndex.from_tuples(cat_list, names=["ID"] + attribute_names)
Esempio n. 26
0
    def reload(self):
        """
        Reloads all app specs into memory from the latest update.
        """
        for tag in app_version_tags:
            specs = clients.get('narrative_method_store').list_methods_spec({'tag': tag})

            spec_dict = dict()
            for spec in specs:
                spec_dict[spec['info']['id']] = spec
            self.app_specs[tag] = spec_dict
Esempio n. 27
0
    def test_valid_clients(self):
        name_to_type = {
            "workspace": WS_Client,
            "execution_engine2": EE2_Client,
            "narrative_method_store": NMS_Client,
            "service": Service_Client,
            "catalog": Catalog_Client,
        }

        for client_name, client_type in name_to_type.items():
            client = clients.get(client_name)
            self.assertIsInstance(client, client_type)
Esempio n. 28
0
    def retry_jobs(self, job_id_list: List[str]) -> dict:
        """
        Returns
        [
            {
                "job_id": job_id,
                "job": {"state": {"job_id": job_id, "status": status, ...} ...},
                "retry_id": retry_id,
                "retry": {"state": {"job_id": retry_id, "status": status, ...} ...}
            },
            {
                "job": {"state": {"job_id": job_id, "status": status, ...} ...},
                "error": "..."
            }
            ...
            {
                "job": {"state": {"job_id": job_id, "status": DOES_NOT_EXIST}},
                "error": f"Cannot find job with ID {job_id}",
            }
        ]
        where the innermost dictionaries are job states from ee2 and are within the
        job states from job.output_state()
        """
        job_ids, error_ids = self._check_job_list(job_id_list)
        try:
            retry_results = clients.get("execution_engine2").retry_jobs(
                {"job_ids": job_ids}
            )
        except Exception as e:
            raise transform_job_exception(e, "Unable to retry job(s)")
        # for each retry result, refresh the state of the retried and new jobs
        orig_ids = [result["job_id"] for result in retry_results]
        retry_ids = [
            result["retry_id"] for result in retry_results if "retry_id" in result
        ]
        orig_states = self._construct_job_output_state_set(orig_ids)
        retry_states = self._construct_job_output_state_set(
            retry_ids, self._create_jobs(retry_ids)  # add to self._running_jobs index
        )
        job_states = {**orig_states, **retry_states}

        results_by_job_id = {}
        # fill in the job state details
        for result in retry_results:
            job_id = result["job_id"]
            results_by_job_id[job_id] = {"job_id": job_id, "job": job_states[job_id]}
            if "retry_id" in result:
                retry_id = result["retry_id"]
                results_by_job_id[job_id]["retry_id"] = retry_id
                results_by_job_id[job_id]["retry"] = job_states[retry_id]
            if "error" in result:
                results_by_job_id[job_id]["error"] = result["error"]
        return self.add_errors_to_results(results_by_job_id, error_ids)
Esempio n. 29
0
    def cancel_job(self, job_id: str, parent_job_id: str = None) -> None:
        """
        Cancels a running job, placing it in a canceled state.
        Does NOT delete the job.
        if the job_id is None or not found in this Narrative, a ValueError is raised.
        This then checks the job to see if it is already canceled/finished,
        then attempts to cancel it.
        If either of those steps fail, a NarrativeException is raised.
        """

        if job_id is None:
            raise ValueError('Job id required for cancellation!')
        if not parent_job_id and job_id not in self._running_jobs:
            raise ValueError(f"No job present with id {job_id}")

        try:
            cancel_status = clients.get(
                "execution_engine2").check_job_canceled({"job_id": job_id})
            if cancel_status.get("finished", 0) == 1 or cancel_status.get(
                    "canceled", 0) == 1:
                # It's already finished, don't try to cancel it again.
                return
        except Exception as e:
            raise transform_job_exception(e)

        # Stop updating the job status while we try to cancel.
        # Also, set it to have a special state of 'canceling' while we're doing the cancel
        if not parent_job_id:
            is_refreshing = self._running_jobs[job_id].get('refresh', 0)
            self._running_jobs[job_id]['refresh'] = 0
            self._running_jobs[job_id]['canceling'] = True
        try:
            clients.get('execution_engine2').cancel_job({'job_id': job_id})
        except Exception as e:
            raise transform_job_exception(e)
        finally:
            if not parent_job_id:
                self._running_jobs[job_id]['refresh'] = is_refreshing
                del self._running_jobs[job_id]['canceling']
Esempio n. 30
0
    def reload(self):
        """
        Reloads all app specs into memory from the latest update.
        """
        client = clients.get("narrative_method_store")
        for tag in app_version_tags:
            specs = client.list_methods_spec({"tag": tag})
            spec_dict = dict()
            for spec in specs:
                spec_dict[spec["info"]["id"]] = spec
            self.app_specs[tag] = spec_dict

        # And let's load all types from the beginning and cache them
        self.type_specs = client.list_categories({"load_types": 1})[3]
Esempio n. 31
0
    def reload(self):
        """
        Reloads all app specs into memory from the latest update.
        """
        client = clients.get('narrative_method_store')
        for tag in app_version_tags:
            specs = client.list_methods_spec({'tag': tag})
            spec_dict = dict()
            for spec in specs:
                spec_dict[spec['info']['id']] = spec
            self.app_specs[tag] = spec_dict

        # And let's load all types from the beginning and cache them
        self.type_specs = client.list_categories({'load_types': 1})[3]
Esempio n. 32
0
    def query_ee2_states(
        job_ids: List[str],
        init: bool = True,
    ) -> dict:
        if not job_ids:
            return {}

        return clients.get("execution_engine2").check_jobs({
            "job_ids":
            job_ids,
            "exclude_fields": (JOB_INIT_EXCLUDED_JOB_STATE_FIELDS
                               if init else EXCLUDED_JOB_STATE_FIELDS),
            "return_list":
            0,
        })
Esempio n. 33
0
 def __init__(self, job_id, app_id, inputs, owner, tag='release', app_version=None, cell_id=None, run_id=None):
     """
     Initializes a new Job with a given id, app id, and app app_version.
     The app_id and app_version should both align with what's available in
     the Narrative Method Store service.
     """
     self.job_id = job_id
     self.app_id = app_id
     self.app_version = app_version
     self.tag = tag
     self.cell_id = cell_id
     self.run_id = run_id
     self.inputs = inputs
     self.owner = owner
     self._njs = clients.get('job_service')
Esempio n. 34
0
    def initialize_jobs(self):
        """
        Initializes this JobManager.
        This is expected to be run by a running Narrative, and naturally linked to a workspace.
        So it does the following steps.
        1. app_util.system_variable('workspace_id')
        2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id)
        3. initialize the Job objects by running NJS.get_job_params (also gets app_id)
        4. start the status lookup loop.
        """
        ws_id = system_variable("workspace_id")
        job_states = dict()
        kblogging.log_event(self._log, "JobManager.initialize_jobs",
                            {"ws_id": ws_id})
        try:
            job_states = clients.get("execution_engine2").check_workspace_jobs(
                {
                    "workspace_id": ws_id,
                    "return_list": 0
                })
            self._running_jobs = dict()
        except Exception as e:
            kblogging.log_event(self._log, "init_error", {"err": str(e)})
            new_e = transform_job_exception(e)
            raise new_e

        for job_id, job_state in job_states.items():
            job_input = job_state.get("job_input", {})
            job_meta = job_input.get("narrative_cell_info", {})
            status = job_state.get("status")
            job = Job.from_state(
                job_id,
                job_input,
                job_state.get("user"),
                app_id=job_input.get("app_id"),
                tag=job_meta.get("tag", "release"),
                cell_id=job_meta.get("cell_id", None),
                run_id=job_meta.get("run_id", None),
                token_id=job_meta.get("token_id", None),
                meta=job_meta,
            )
            self._running_jobs[job_id] = {
                "refresh":
                1
                if status not in ["completed", "errored", "terminated"] else 0,
                "job":
                job,
            }
Esempio n. 35
0
    def parameters(self):
        """
        Returns the parameters used to start the job. Job tries to use its inputs field, but
        if that's None, then it makes a call to njs.

        If no exception is raised, this only returns the list of parameters, NOT the whole
        object fetched from NJS.get_job_params
        """
        if self.inputs is not None:
            return self.inputs
        else:
            try:
                self.inputs = clients.get("job_service").get_job_params(self.job_id)[0]['params']
                return self.inputs
            except Exception as e:
                raise Exception("Unable to fetch parameters for job {} - {}".format(self.job_id, e))
Esempio n. 36
0
 def state(self):
     """
     Queries the job service to see the status of the current job.
     Returns a <something> stating its status. (string? enum type? different traitlet?)
     """
     if self._last_state is not None and self._last_state.get('finished', 0) == 1:
         return self._last_state
     try:
         state = sanitize_state(clients.get("job_service").check_job(self.job_id))
         state[u'cell_id'] = self.cell_id
         state[u'run_id'] = self.run_id
         state[u'token_id'] = self.token_id
         self._last_state = state
         return dict(state)
     except Exception as e:
         raise Exception("Unable to fetch info for job {} - {}".format(self.job_id, e))
Esempio n. 37
0
 def state(self):
     """
     Queries the job service to see the status of the current job.
     Returns a <something> stating its status. (string? enum type? different traitlet?)
     """
     if self._last_state is not None and self._last_state.get('finished', 0) == 1:
         return self._last_state
     try:
         state = sanitize_state(clients.get("job_service").check_job(self.job_id))
         state[u'cell_id'] = self.cell_id
         state[u'run_id'] = self.run_id
         state[u'token_id'] = self.token_id
         self._last_state = state
         return dict(state)
     except Exception as e:
         raise Exception("Unable to fetch info for job {} - {}".format(self.job_id, e))
Esempio n. 38
0
    def parameters(self):
        """
        Returns the parameters used to start the job. Job tries to use its inputs field, but
        if that's None, then it makes a call to njs.

        If no exception is raised, this only returns the list of parameters, NOT the whole
        object fetched from NJS.get_job_params
        """
        if self.inputs is not None:
            return self.inputs
        else:
            try:
                self.inputs = clients.get("job_service").get_job_params(self.job_id)[0]['params']
                return self.inputs
            except Exception as e:
                raise Exception("Unable to fetch parameters for job {} - {}".format(self.job_id, e))
Esempio n. 39
0
    def _construct_job_status_set(self, job_ids: list) -> dict:
        """
        Builds a set of job states for the list of job ids.
        """
        # if cached, use 'em.
        # otherwise, lookup.
        # do transform
        # cache terminal ones.
        # return all.
        if not isinstance(job_ids, list):
            raise ValueError("job_ids must be a list")
        if job_ids is None:
            job_ids = self._running_jobs.keys()

        job_states = dict()
        jobs_to_lookup = list()

        # Fetch from cache of terminated jobs, where available.
        # These are already post-processed and ready to return.
        for job_id in job_ids:
            if job_id in self._completed_job_states:
                job_states[job_id] = self._completed_job_states[job_id]
            else:
                jobs_to_lookup.append(job_id)

        fetched_states = dict()
        # Get the rest of states direct from EE2.
        if len(jobs_to_lookup):
            try:
                fetched_states = clients.get("execution_engine2").check_jobs({
                    "job_ids":
                    jobs_to_lookup,
                    "exclude_fields":
                    EXCLUDED_JOB_STATE_FIELDS,
                    "return_list":
                    0
                })
            except Exception as e:
                kblogging.log_event(self._log, "construct_job_status_set",
                                    {"err": str(e)})
        for job_id, state in fetched_states.items():
            revised_state = self._construct_job_status(self.get_job(job_id),
                                                       state)
            if revised_state["state"]["status"] in TERMINAL_STATES:
                self._completed_job_states[job_id] = revised_state
            job_states[job_id] = revised_state
        return job_states
Esempio n. 40
0
    def app_description(self, app_id, tag='release'):
        """
        Returns the app description as a printable object. Makes it kinda pretty? repr_html, maybe?
        """
        self.check_app(app_id, tag, raise_exception=True)

        info = clients.get('narrative_method_store').get_method_full_info({'ids': [app_id], 'tag': tag})[0]

        tmpl = """
        <div class="bg-info" style="padding:15px">
            <h1>{{info.name}} <small>{{info.module_name}}</small></h1>
            <p class='lead'>{{info.id}} - v{{info.ver}}</p>
        </div>
        <p class='lead'>{{info.subtitle}}</p>
        <hr>
        {{info.description}}
        """
        return HTML(Template(tmpl).render(info=info))
Esempio n. 41
0
    def _child_job_states(self, sub_job_list, app_id, app_tag):
        """
        Fetches state for all jobs in the list. These are expected to be child jobs, with no actual Job object associated.
        So if they're done, we need to do the output mapping out of band.
        But the check_jobs call with params will return the app id. So that helps.

        app_id = the id of the app that all the child jobs are running (format: module/method, like "MEGAHIT/run_megahit")
        app_tag = one of "release", "beta", "dev"
        (the above two aren't stored with the subjob metadata, and won't until we back some more on KBParallel - I want to
        lobby for pushing toward just starting everything up at once from here and letting HTCondor deal with allocation)
        sub_job_list = list of ids of jobs to look up
        """
        if not sub_job_list:
            return []

        sub_job_list = sorted(sub_job_list)
        job_info = clients.get('job_service').check_jobs({'job_ids': sub_job_list, 'with_job_params': 1})
        job_info = sanitize_all_states(job_info)
        child_job_states = list()

        for job_id in sub_job_list:
            params = job_info['job_params'][job_id]
            # if it's error, get the error.
            if job_id in job_info['check_error']:
                error = job_info['check_error'][job_id]
                error.update({'job_id': job_id})
                child_job_states.append(error)
                continue
            # if it's done, get the output mapping.
            state = job_info['job_states'][job_id]
            if state.get('finished', 0) == 1:
                try:
                    widget_info = Job.map_viewer_params(
                        state,
                        params['params'],
                        app_id,
                        app_tag
                    )
                except ValueError:
                    widget_info = {}
                state.update({'widget_info': widget_info})
            child_job_states.append(state)
        return child_job_states
Esempio n. 42
0
    def _run_dynamic_service_internal(self, app_id, params, tag, version, cell_id, run_id):
        spec = self._get_validated_app_spec(app_id, tag, False, version=version)

        # Log that we're trying to run a job...
        log_info = {
            'app_id': app_id,
            'tag': tag,
            'username': system_variable('user_id'),
            'ws': system_variable('workspace')
        }
        kblogging.log_event(self._log, "run_dynamic_service", log_info)

        # Silly to keep this here, but we do not validate the incoming parameters.
        # If they are provided by the UI (we have cell_id), they are constructed
        # according to the spec, so are trusted;
        # Otherwise, if they are the product of direct code cell entry, this is a mode we do not
        # "support", so we can let it fail hard.
        # In the future when code cell interaction is supported for users, we will need to provide
        # robust validation and error reporting, but this may end up being (should be) provided by the
        # sdk execution infrastructure anyway

        input_vals = params
        function_name = spec['behavior']['kb_service_name'] + '.' + spec['behavior']['kb_service_method']
        try:
            result = clients.get("service").sync_call(
                function_name,
                input_vals,
                service_version=tag
            )[0]
            # if a ui call (a cell_id is defined) we send a result message, otherwise
            # just the raw result for display in a code cell. This is how we "support"
            # code cells for internal usage.
            if cell_id:
                self.send_cell_message('result', cell_id, run_id, {
                    'result': result
                })
            else:
                return result
        except:
            raise
Esempio n. 43
0
    def _get_all_job_states(self, job_ids=None):
        """
        Returns the state for all running jobs
        """
        # 1. Get list of ids
        if job_ids is None:
            job_ids = self._running_jobs.keys()
        # 1.5 Go through job ids and remove ones that aren't found.
        job_ids = [j for j in job_ids if j in self._running_jobs]
        # 2. Foreach, check if in completed cache. If so, grab the status. If not, enqueue id
        # for batch lookup.
        job_states = dict()
        jobs_to_lookup = list()
        for job_id in job_ids:
            if job_id in self._completed_job_states:
                job_states[job_id] = dict(self._completed_job_states[job_id])
            else:
                jobs_to_lookup.append(job_id)
        # 3. Lookup those jobs what need it. Cache 'em as we go, if finished.
        try:
            fetched_states = clients.get('job_service').check_jobs({'job_ids': jobs_to_lookup})
        except Exception as e:
            kblogging.log_event(self._log, 'get_all_job_states_error', {'err': str(e)})
            return {}

        error_states = fetched_states.get('check_errors', {})
        fetched_states = fetched_states.get('job_states', {})
        for job_id in jobs_to_lookup:
            if job_id in fetched_states:
                state = fetched_states[job_id]
                state['cell_id'] = self._running_jobs[job_id]['job'].cell_id
                state['run_id'] = self._running_jobs[job_id]['job'].run_id
                if state.get('finished', 0) == 1:
                    self._completed_job_states[state['job_id']] = dict(state)
                job_states[state['job_id']] = state
            elif job_id in error_states:
                error = error_states[job_id]
                job_states[state['job_id']] = {'lookup_error': error}

        return job_states
Esempio n. 44
0
def _get_categories(ids, matrix_ref, attributemapping_ref=None, mapping=None, whitelist=(),
                    clustergrammer=False):
    """Creates the correct kind of multi-factor index for clustergrammer display"""
    if not attributemapping_ref or whitelist is None:
        return ids
    cat_list = []
    ws = clients.get('workspace')
    attribute_data = ws.get_objects2(
        {'objects': [{'ref': matrix_ref + ";" + attributemapping_ref}]})['data'][0]['data']

    if not mapping:
        mapping = {x: x for x in ids}
    whitelist = set(whitelist)

    for _id in ids:
        try:
            attribute_values = attribute_data['instances'][mapping[_id]]
        except KeyError:
            if _id not in mapping:
                raise ValueError("Row or column id {} is not in the provided mapping".format(_id))
            raise ValueError("AttributeMapping {} has no attribute {} which corresponds to row or "
                             "column id {} in the provided object.".format(attributemapping_ref,
                                                                           mapping[_id], _id))
        cats = [_id]
        for i, val in enumerate(attribute_values):
            cat_name = attribute_data['attributes'][i]['attribute']
            if whitelist and cat_name not in whitelist:
                continue
            if clustergrammer:
                cats.append("{}: {}".format(cat_name, val))
            else:
                cats.append(val)
        cat_list.append(tuple(cats))

    if clustergrammer:
        return cat_list
    attribute_names = [x['attribute'] for x in attribute_data['attributes']
                       if not whitelist or x['attribute'] in whitelist]
    return pd.MultiIndex.from_tuples(cat_list, names=['ID']+attribute_names)
Esempio n. 45
0
    def show_data_widget(self, upa, title=None, cell_id=None, tag="release"):
        """
        Renders a widget using the generic kbaseNarrativeOutputCell container.
        First, it looks up the UPA to get its object type. It then uses that type to look up
        what the viewer app should be. This contains the widget and the parameter mapping to view
        that widget. It then maps all of these together to run show_output_widget against a widget
        with a set of parameters for it.

        If there's an error here at any step, it still renders a widget, but it makes a
        kbaseNarrativeError widget instead, that'll hopefully be informative.

        Parameters
        ----------
        upa : string
            UPA defining a workspace object. Used to translate that object into parameters
            for the mapping to the data object used in the output cell.
            This may also be a Workspace reference path.
        title=None : string
            A title for the cell. If None, this just gets replaced with an empty string.
        cell_id=None : string
            if not None, this should be the id of the cell where the widget will live. Generated by
            the Narrative frontend.
        tag="release" : string
            All objects are related to their viewers by an app. This is the tag for that app's
            release state (should be one of release, beta, or dev)
        """
        widget_name = 'widgets/function_output/kbaseDefaultObjectView'   # set as default, overridden below
        widget_data = dict()
        upas = dict()
        info_tuple = clients.get('workspace').get_object_info_new({'objects': [{'ref': upa}],
                                                                   'includeMetadata': 1})[0]
        bare_type = info_tuple[2].split('-')[0]
        type_module = bare_type.split(".")[0]

        type_spec = self._sm.get_type_spec(bare_type, raise_exception=False)

        if type_spec is None:
            widget_data = {
                "error": {
                    "msg": "Unable to find viewer specification for objects of type {}.".format(bare_type),
                    "method_name": "WidgetManager.show_data_widget",
                    "traceback": "Can't find type spec info for type {}".format(bare_type)
                }
            }
            upas['upas'] = [upa]  # doompety-doo
        else:
            if not type_spec.get('view_method_ids'):
                return "No viewer found for objects of type {}".format(bare_type)
            app_id = type_spec['view_method_ids'][0]
            app_spec = None
            try:
                app_spec = self._sm.get_spec(app_id, tag=tag)
            except Exception as e:
                widget_data = {
                    "error": {
                        "msg": "Unable to find specification for viewer app {}".format(app_id),
                        "method_name": "WidgetManager.show_data_widget",
                        "traceback": e.message
                    }
                }
            if app_spec is not None:
                spec_params = self._sm.app_params(app_spec)
                input_params = {}
                is_ref_path = ';' in upa
                is_external = info_tuple[7] != os.environ['KB_WORKSPACE_ID']
                # it's not safe to use reference yet (until we switch to them all over the Apps)
                # But in case we deal with ref-path we have to do it anyway:
                obj_param_value = upa if (is_ref_path or is_external) else info_tuple[1]
                upa_params = list()
                for param in spec_params:
                    if param.get('allowed_types') is None or any((t == bare_type or t == type_module) for t in param.get('allowed_types', [])):
                        input_params[param['id']] = obj_param_value
                        upa_params.append(param['id'])

                (input_params, ws_refs) = validate_parameters(app_id, tag,
                                                              spec_params, input_params)
                (widget_name, widget_data) = map_outputs_from_state([], input_params, app_spec)

                # Figure out params for upas.
                for mapping in app_spec.get('behavior', {}).get('output_mapping', []):
                    if mapping.get('input_parameter', '') in upa_params and 'target_property' in mapping:
                        upas[mapping['target_property']] = upa

        return self.show_output_widget(
            widget_name,
            widget_data,
            upas=upas,
            title=title,
            type="viewer",
            cell_id=cell_id
        )
Esempio n. 46
0
    def _run_app_internal(self, app_id, params, tag, version,
                          cell_id, run_id, dry_run):
        """
        Attemps to run the app, returns a Job with the running app info.
        Should *hopefully* also inject that app into the Narrative's metadata.
        Probably need some kind of JavaScript-foo to get that to work.

        Parameters:
        -----------
        app_id - should be from the app spec, e.g. 'build_a_metabolic_model'
                    or 'MegaHit/run_megahit'.
        params - a dictionary of parameters.
        tag - optional, one of [release|beta|dev] (default=release)
        version - optional, a semantic version string. Only released modules
                  have versions, so if the tag is not 'release', and a version
                  is given, a ValueError will be raised.
        **kwargs - these are the set of parameters to be used with the app.
                   They can be found by using the app_usage function. If any
                   non-optional apps are missing, a ValueError will be raised.
        """
        ws_id = strict_system_variable('workspace_id')
        spec = self._get_validated_app_spec(app_id, tag, True, version=version)

        # Preflight check the params - all required ones are present, all
        # values are the right type, all numerical values are in given ranges
        spec_params = self.spec_manager.app_params(spec)

        spec_params_map = dict((spec_params[i]['id'], spec_params[i])
                               for i in range(len(spec_params)))
        ws_input_refs = extract_ws_refs(app_id, tag, spec_params, params)
        input_vals = self._map_inputs(
            spec['behavior']['kb_service_input_mapping'],
            params,
            spec_params_map)

        service_method = spec['behavior']['kb_service_method']
        service_name = spec['behavior']['kb_service_name']
        service_ver = spec['behavior'].get('kb_service_version', None)

        # Let the given version override the spec's version.
        if version is not None:
            service_ver = version

        # This is what calls the function in the back end - Module.method
        # This isn't the same as the app spec id.
        function_name = service_name + '.' + service_method
        job_meta = {'tag': tag}
        if cell_id is not None:
            job_meta['cell_id'] = cell_id
        if run_id is not None:
            job_meta['run_id'] = run_id

        # This is the input set for NJSW.run_job. Now we need the workspace id
        # and whatever fits in the metadata.
        job_runner_inputs = {
            'method': function_name,
            'service_ver': service_ver,
            'params': input_vals,
            'app_id': app_id,
            'wsid': ws_id,
            'meta': job_meta
        }
        if len(ws_input_refs) > 0:
            job_runner_inputs['source_ws_objects'] = ws_input_refs
        if dry_run:
            return job_runner_inputs

        # We're now almost ready to run the job. Last, we need an agent token.
        try:
            token_name = 'KBApp_{}'.format(app_id)
            token_name = token_name[:self.__MAX_TOKEN_NAME_LEN]
            agent_token = auth.get_agent_token(auth.get_auth_token(), token_name=token_name)
        except Exception as e:
            raise
        job_runner_inputs['meta']['token_id'] = agent_token['id']

        # Log that we're trying to run a job...
        log_info = {
            'app_id': app_id,
            'tag': tag,
            'version': service_ver,
            'username': system_variable('user_id'),
            'wsid': ws_id
        }
        kblogging.log_event(self._log, "run_app", log_info)

        try:
            job_id = clients.get("job_service", token=agent_token['token']).run_job(job_runner_inputs)
        except Exception as e:
            log_info.update({'err': str(e)})
            kblogging.log_event(self._log, "run_app_error", log_info)
            raise transform_job_exception(e)

        new_job = Job(job_id,
                      app_id,
                      input_vals,
                      system_variable('user_id'),
                      tag=tag,
                      app_version=service_ver,
                      cell_id=cell_id,
                      run_id=run_id,
                      token_id=agent_token['id'])

        self._send_comm_message('run_status', {
            'event': 'launched_job',
            'event_at': datetime.datetime.utcnow().isoformat() + 'Z',
            'cell_id': cell_id,
            'run_id': run_id,
            'job_id': job_id
        })
        JobManager().register_new_job(new_job)
        if cell_id is not None:
            return
        else:
            return new_job
Esempio n. 47
0
    def _run_app_batch_internal(self, app_id, params, tag, version, cell_id, run_id, dry_run):
        batch_method = "kb_BatchApp.run_batch"
        batch_app_id = "kb_BatchApp/run_batch"
        batch_method_ver = "dev"
        batch_method_tag = "dev"
        ws_id = strict_system_variable('workspace_id')
        spec = self._get_validated_app_spec(app_id, tag, True, version=version)

        # Preflight check the params - all required ones are present, all
        # values are the right type, all numerical values are in given ranges
        spec_params = self.spec_manager.app_params(spec)

        # A list of lists of UPAs, used for each subjob.
        batch_ws_upas = list()
        # The list of actual input values, post-mapping.
        batch_run_inputs = list()

        for param_set in params:
            spec_params_map = dict((spec_params[i]['id'], spec_params[i])
                                   for i in range(len(spec_params)))
            batch_ws_upas.append(extract_ws_refs(app_id, tag, spec_params, param_set))
            batch_run_inputs.append(self._map_inputs(
                spec['behavior']['kb_service_input_mapping'],
                param_set,
                spec_params_map))

        service_method = spec['behavior']['kb_service_method']
        service_name = spec['behavior']['kb_service_name']
        service_ver = spec['behavior'].get('kb_service_version', None)

        # Let the given version override the spec's version.
        if version is not None:
            service_ver = version

        # This is what calls the function in the back end - Module.method
        # This isn't the same as the app spec id.
        job_meta = {
            'tag': batch_method_tag,
            'batch_app': app_id,
            'batch_tag': tag,
            'batch_size': len(params),
        }
        if cell_id is not None:
            job_meta['cell_id'] = cell_id
        if run_id is not None:
            job_meta['run_id'] = run_id

        # Now put these all together in a way that can be sent to the batch processing app.
        batch_params = [{
            "module_name": service_name,
            "method_name": service_method,
            "service_ver": service_ver,
            "wsid": ws_id,
            "meta": job_meta,
            "batch_params": [{
                "params": batch_run_inputs[i],
                "source_ws_objects": batch_ws_upas[i]
            } for i in range(len(batch_run_inputs))],
        }]

        # We're now almost ready to run the job. Last, we need an agent token.
        try:
            token_name = 'KBApp_{}'.format(app_id)
            token_name = token_name[:self.__MAX_TOKEN_NAME_LEN]
            agent_token = auth.get_agent_token(auth.get_auth_token(), token_name=token_name)
        except Exception as e:
            raise

        job_meta['token_id'] = agent_token['id']
        # This is the input set for NJSW.run_job. Now we need the workspace id
        # and whatever fits in the metadata.
        job_runner_inputs = {
            'method': batch_method,
            'service_ver': batch_method_ver,
            'params': batch_params,
            'app_id': batch_app_id,
            'wsid': ws_id,
            'meta': job_meta
        }
        # if len(ws_input_refs) > 0:
        #     job_runner_inputs['source_ws_objects'] = ws_input_refs

        # if we're doing a dry run, just return the inputs that we made.
        if dry_run:
            return job_runner_inputs

        # Log that we're trying to run a job...
        log_info = {
            'app_id': app_id,
            'tag': batch_method_tag,
            'version': service_ver,
            'username': system_variable('user_id'),
            'wsid': ws_id
        }
        kblogging.log_event(self._log, "run_batch_app", log_info)

        try:
            job_id = clients.get("job_service", token=agent_token['token']).run_job(job_runner_inputs)
        except Exception as e:
            log_info.update({'err': str(e)})
            kblogging.log_event(self._log, "run_batch_app_error", log_info)
            raise transform_job_exception(e)

        new_job = Job(job_id,
                      batch_app_id,
                      batch_params,
                      system_variable('user_id'),
                      tag=batch_method_tag,
                      app_version=batch_method_ver,
                      cell_id=cell_id,
                      run_id=run_id,
                      token_id=agent_token['id'],
                      meta=job_meta)

        self._send_comm_message('run_status', {
            'event': 'launched_job',
            'event_at': datetime.datetime.utcnow().isoformat() + 'Z',
            'cell_id': cell_id,
            'run_id': run_id,
            'job_id': job_id
        })
        JobManager().register_new_job(new_job)
        if cell_id is not None:
            return
        else:
            return new_job
Esempio n. 48
0
"""
Some utility functions for running KBase Apps or Methods or whatever they are this week.
"""
__author__ = "Bill Riehl <*****@*****.**>, Roman Sutormin <*****@*****.**>"

import os
import re
import biokbase.narrative.clients as clients

app_version_tags = ['release', 'beta', 'dev']
_ws_client = clients.get('workspace')

def check_tag(tag, raise_exception=False):
    """
    Checks if the given tag is one of "release", "beta", or "dev".
    Returns a boolean.
    if raise_exception == True and the tag is bad, raises a ValueError
    """
    tag_exists = tag in app_version_tags
    if not tag_exists and raise_exception:
        raise ValueError("Can't find tag %s - allowed tags are %s" % (tag, ", ".join(app_version_tags)))
    else:
        return tag_exists

def system_variable(var):
    """
    Returns a KBase system variable. Just a little wrapper.

    Parameters
    ----------
    var: string, one of "workspace", "workspace_id", "token", "user_id"
Esempio n. 49
0
    def initialize_jobs(self, start_lookup_thread=True):
        """
        Initializes this JobManager.
        This is expected to be run by a running Narrative, and naturally linked to a workspace.
        So it does the following steps.
        1. app_util.system_variable('workspace_id')
        2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id)
        3. initialize the Job objects by running NJS.get_job_params (also gets app_id)
        4. start the status lookup loop.
        """

        the_time = int(round(time.time() * 1000))

        self._send_comm_message('start', {'time': the_time})

        ws_id = system_variable('workspace_id')
        try:
            nar_jobs = clients.get('user_and_job_state').list_jobs2({
                'authstrat': 'kbaseworkspace',
                'authparams': [str(ws_id)]
            })
        except Exception as e:
            kblogging.log_event(self._log, 'init_error', {'err': str(e)})
            new_e = transform_job_exception(e)
            error = {
                'error': 'Unable to get initial jobs list',
                'message': getattr(new_e, 'message', 'Unknown reason'),
                'code': getattr(new_e, 'code', -1),
                'source': getattr(new_e, 'source', 'jobmanager'),
                'name': getattr(new_e, 'name', type(e).__name__),
                'service': 'user_and_job_state'
            }
            self._send_comm_message('job_init_err', error)
            raise new_e

        job_ids = [j[0] for j in nar_jobs]
        job_states = clients.get('job_service').check_jobs({
            'job_ids': job_ids, 'with_job_params': 1
        })
        job_param_info = job_states.get('job_params', {})
        job_check_error = job_states.get('check_error', {})
        error_jobs = dict()
        for info in nar_jobs:
            job_id = info[0]
            user_info = info[1]
            job_meta = info[10]
            try:
                if job_id in job_param_info:
                    job_info = job_param_info[job_id]

                    job = Job.from_state(job_id,
                                         job_info,
                                         user_info[0],
                                         app_id=job_info.get('app_id'),
                                         tag=job_meta.get('tag', 'release'),
                                         cell_id=job_meta.get('cell_id', None),
                                         run_id=job_meta.get('run_id', None),
                                         token_id=job_meta.get('token_id', None),
                                         meta=job_meta)

                    # Note that when jobs for this narrative are initially loaded,
                    # they are set to not be refreshed. Rather, if a client requests
                    # updates via the start_job_update message, the refresh flag will
                    # be set to True.
                    self._running_jobs[job_id] = {
                        'refresh': 0,
                        'job': job
                    }
                elif job_id in job_check_error:
                    job_err_state = {
                        'job_state': 'error',
                        'error': {
                            'error': 'KBase execution engine returned an error while looking up this job.',
                            'message': job_check_error[job_id].get('message', 'No error message available'),
                            'name': 'Job Error',
                            'code': job_check_error[job_id].get('code', -999),
                            'exception': {
                                'error_message': 'Job lookup in execution engine failed',
                                'error_type': job_check_error[job_id].get('name', 'unknown'),
                                'error_stacktrace': job_check_error[job_id].get('error', '')
                            }
                        },
                        'cell_id': job_meta.get('cell_id', None),
                        'run_id': job_meta.get('run_id', None),
                    }
                    error_jobs[job_id] = job_err_state

            except Exception as e:
                kblogging.log_event(self._log, 'init_error', {'err': str(e)})
                new_e = transform_job_exception(e)
                error = {
                    'error': 'Unable to get job info on initial lookup',
                    'job_id': job_id,
                    'message': getattr(new_e, 'message', 'Unknown reason'),
                    'code': getattr(new_e, 'code', -1),
                    'source': getattr(new_e, 'source', 'jobmanager'),
                    'name': getattr(new_e, 'name', type(e).__name__),
                    'service': 'job_service'
                }
                self._send_comm_message('job_init_lookup_err', error)
                raise new_e  # should crash and burn on any of these.

        if len(job_check_error):
            err_str = 'Unable to find info for some jobs on initial lookup'
            err_type = 'job_init_partial_err'
            if len(job_check_error) == len(nar_jobs):
                err_str = 'Unable to get info for any job on initial lookup'
                err_type = 'job_init_lookup_err'
            error = {
                'error': err_str,
                'job_errors': error_jobs,
                'message': 'Job information was unavailable from the server',
                'code': -2,
                'source': 'jobmanager',
                'name': 'jobmanager',
                'service': 'job_service',
            }
            self._send_comm_message(err_type, error)

        if not self._running_lookup_loop and start_lookup_thread:
            # only keep one loop at a time in cause this gets called again!
            if self._lookup_timer is not None:
                self._lookup_timer.cancel()
            self._running_lookup_loop = True
            self._lookup_job_status_loop()
        else:
            self._lookup_all_job_status()
Esempio n. 50
0
 def _update_log(self):
     log_update = clients.get("job_service").get_job_logs(
         {'job_id': self.job_id,
          'skip_lines': len(self._job_logs)})
     if log_update['lines']:
         self._job_logs = self._job_logs + log_update['lines']
Esempio n. 51
0
    def infer_upas(self, widget_name, params):
        """
        Use the given widget_name and parameters (to be passed to the widget) to infer any upas.
        This will generally mean using the workspace object name and workspace name to do a
        lookup in the Workspace and constructing the upa or upa path from there.

        widget_name - string - Name of the widget to be used, this gets looked up in the widget
                    param map. This maps all widget input parameters onto some sensible language.
        params - dict - keys = id of parameter, values = value of parameter.

        So the general flow is something like this. We go through all parameters, see what context
        those map on to, and infer, from that, what are the workspace objects. We can then look up
        those objects by the workspace and object name, and use the info to construct UPAs.

        Example: wm.infer_upas("kbasePanGenome", { "ws": "my_workspace", "name": "my_pangenome" })
        The widget parameter map has this entry:
        "kbasePanGenome": {
            "ws": "ws_name",
            "name": "obj_name"
        }
        So we know, by inference, that "my_workspace" is a workspace name, and "my_pangenome" is an
        object name.

        We can use this info to look up the object info from the Workspace, let's say it's 3/4/5.
        This then gets returned as another dict:
        {
            "name": "3/4/5"
        }

        This applies for lists, too. If, above, the value for the "name" parameter was a list of
        strings, this would treat all of those as objects, and try to return a list of UPAs instead.


        """
        param_to_context = self.widget_param_map.get(widget_name, {})
        obj_names = list()  # list of tuples - first = param id, second = object name
        obj_refs = list()   # list of tuples - first = param id, second = UPA
        obj_name_list = list()  # list of tuples, but the second is a list of names
        obj_ref_list = list()   # list of tuples, but second is a list of upas
        ws = None
        for param in params.keys():
            if param in param_to_context:
                context = param_to_context[param]
                if context == "ws_id" or context == "ws_name":
                    ws = params[param]
                elif context == "obj_name" or context == "obj_id":
                    obj_names.append((param, params[param]))
                elif context == "obj_name_list":
                    obj_name_list.append((param, params[param]))
                elif context == "obj_ref":
                    obj_refs.append((param, params[param]))
                elif context == "obj_ref_list":
                    obj_ref_list.append((param, params[param]))

        # return value will look like this:
        # {
        #   param1: upa,
        #   param2: upa
        #   param3: [upa1, upa2],
        #   ... etc
        # }
        upas = dict()

        # First, test obj_refs, and obj_refs_list
        # These might be references of the form ws_name/obj_name, which are not proper UPAs and
        # need to be resolved. Gotta test 'em all.
        lookup_params = list()
        info_params = list()

        for (param, ref) in obj_refs:
            if is_upa(str(ref)):
                upas[param] = ref
            elif is_ref(str(ref)):
                info_params.append({"ref": ref})
                lookup_params.append(param)
            else:
                raise ValueError('Parameter {} has value {} which was expected to refer to an object'.format(param, ref))

        # params for get_object_info3
        for (param, name) in obj_names:
            # it's possible that these are misnamed and are actually upas already. test and add to
            # the upas dictionary if so.
            if is_upa(str(name)):
                upas[param] = name
            elif is_ref(str(name)):
                info_params.append({"ref": name})
                lookup_params.append(param)
            else:
                info_params.append({"ref": "{}/{}".format(ws, name)})
                lookup_params.append(param)

        if (len(lookup_params)):
            ws_client = clients.get('workspace')
            ws_info = ws_client.get_object_info3({'objects': info_params})
            for (idx, path) in enumerate(ws_info['paths']):
                upas[lookup_params[idx]] = ';'.join(path)

        # obj_refs and obj_names are done. Do the list versions now.
        lookup_params = list()
        info_params = list()
        for (param, ref_list) in obj_ref_list:
            # error fast if any member of a list isn't actually a ref.
            # this might be me being lazy, but I suspect there's a problem if the inputs aren't
            # actually uniform.
            for ref in ref_list:
                if not is_ref(str(ref)):
                    raise ValueError('Parameter {} has value {} which contains an item that is not a valid object reference'.format(param, ref_list))
            lookup_params.append(param)
            info_params.append([{'ref': ref} for ref in ref_list])

        for (param, name_list) in obj_name_list:
            info_param = list()
            for name in name_list:
                if is_ref(str(name)):
                    info_param.append({'ref': name})
                else:
                    info_param.append({'ref': "{}/{}".format(ws, name)})
            info_params.append(info_param)
            lookup_params.append(param)

        # This time we have a one->many mapping from params to each list. Run ws lookup in a loop
        for (idx, param) in enumerate(lookup_params):
            ws_info = ws_client.get_object_info3({'objects': info_params[idx]})
            upas[param] = [';'.join(path) for path in ws_info['paths']]
        return upas
Esempio n. 52
0
    def initialize_jobs(self):
        """
        Initializes this JobManager.
        This is expected to be run by a running Narrative, and naturally linked to a workspace.
        So it does the following steps.
        1. app_util.system_variable('workspace_id')
        2. get list of jobs with that ws id from UJS (also gets tag, cell_id, run_id)
        3. initialize the Job objects by running NJS.get_job_params on each of those (also gets app_id)
        4. start the status lookup loop.
        """

        ws_id = system_variable('workspace_id')
        try:
            nar_jobs = clients.get('user_and_job_state').list_jobs2({
                'authstrat': 'kbaseworkspace',
                'authparams': [str(ws_id)]
            })
        except Exception as e:
            kblogging.log_event(self._log, 'init_error', {'err': str(e)})
            new_e = transform_job_exception(e)
            error = {
                'error': 'Unable to get initial jobs list',
                'message': getattr(new_e, 'message', 'Unknown reason'),
                'code': getattr(new_e, 'code', -1),
                'source': getattr(new_e, 'source', 'jobmanager'),
                'name': getattr(new_e, 'name', type(e).__name__),
                'service': 'user_and_job_state'
            }
            self._send_comm_message('job_init_err', error)
            raise new_e

        for info in nar_jobs:
            job_id = info[0]
            user_info = info[1]
            job_meta = info[10]
            try:
                job_info = clients.get('job_service').get_job_params(job_id)[0]

                self._running_jobs[job_id] = {
                    'refresh': True,
                    'job': Job.from_state(job_id,
                                          job_info,
                                          user_info[0],
                                          app_id=job_info.get('app_id'),
                                          tag=job_meta.get('tag', 'release'),
                                          cell_id=job_meta.get('cell_id', None),
                                          run_id=job_meta.get('run_id', None))
                }
                
            except Exception as e:
                kblogging.log_event(self._log, 'init_error', {'err': str(e)})
                new_e = transform_job_exception(e)
                error = {
                    'error': 'Unable to get job info on initial lookup',
                    'job_id': job_id,
                    'message': getattr(new_e, 'message', 'Unknown reason'),
                    'code': getattr(new_e, 'code', -1),
                    'source': getattr(new_e, 'source', 'jobmanager'),
                    'name': getattr(new_e, 'name', type(e).__name__),
                    'service': 'job_service'
                }
                self._send_comm_message('job_init_lookup_err', error)
                raise new_e # should crash and burn on any of these.

        if not self._running_lookup_loop:
            # only keep one loop at a time in cause this gets called again!
            if self._lookup_timer is not None:
                self._lookup_timer.cancel()
            self._running_lookup_loop = True
            self._lookup_job_status_loop()
        else:
            self._lookup_all_job_status()
Esempio n. 53
0
def update_method_cell(cell):
    """
    Updates a single method cell to fill these two constraints:
    1. Become a code cell, NOT a markdown cell.
    2. Translate the cell's metadata to the right structure.
    3. Remove the MD code from the source area.

    Some assumptions made here:
    1. Jobs associated with the cell are not available. So the only
       states are either editing or complete (default to editing)
    2. We don't know what tag the methods came from, so go with 'release'
    """
    # 1. Get its metadata and update it to be new cell-ish
    meta = cell['metadata']['kb-cell']
    if 'method' not in meta:
        # throw an error?
        return cell

    # try to find cell_id, if not, make up a new one.

    method_info = meta['method'].get('info', {})
    method_behavior = meta['method'].get('behavior', {})
    widget_state = meta.get('widget_state', [])
    if len(widget_state):
        widget_state = widget_state[0]
    else:
        widget_state = {}

    runtime_state = None
    if 'state' in widget_state:
        runtime_state = widget_state['state']

    method_params = runtime_state.get('params', None)
    if not method_params:
        method_params = {}

    # guess at the FSM state for the method cell from the runtime_state.runningState
    cur_state = runtime_state.get('runningState', 'input')
    fsm_state = {}
    if cur_state == 'input':
        fsm_state = {
            'mode': 'editing',
            'params': 'incomplete'
        }
    elif cur_state in ['submitted', 'queued', 'running', 'error']:
        # no longer access to the job, so just reset to input
        fsm_state = {
            'mode': 'editing',
            'params': 'complete'
        }
    else:
        # only one left is complete...
        fsm_state = {
            'mode': 'success',
            'params': 'complete'
        }

    ts = widget_state.get('time', None)
    if ts:
        ts = datetime.datetime.utcfromtimestamp(ts/1000.0).strftime('%a, %d %b %Y %H:%M:%S GMT')

    git_hash = method_info.get('git_commit_hash', None)
    app_name = method_info.get('id', '')
    # the app_name in this case, is everything after the slash. So MegaHit/run_megahit would just be 'run_megahit'
    app_name = app_name[app_name.find('/')+1:]
    module_name = method_behavior.get('kb_service_name', None)
    tag = None
    # now we get the version, if it exists.
    # print("{}/{}".format(module_name, git_hash))
    # Suddenly, this is very complex...
    # Need git_hash and module_name to look up the version.
    # if lookup succeeds -
    #   if has a release tag, use it.
    #   if not, lookup the module's info (get_module_info), use the most released one (release > beta > dev) and change the hash
    # if lookup fails -
    #   try again with just the module info
    #   if THAT fails, the cell can't be updated.
    # if no git_hash or module_name, it's not an SDK-based cell and can't be looked up.
    if git_hash and module_name:
        cat = clients.get('catalog')
        tag_pref_order = ['release', 'beta', 'dev']
        try:
            # print('looking up ' + module_name + ' hash ' + git_hash)
            version_info = cat.get_module_version({'module_name': module_name, 'version': git_hash})
            if 'release_tags' in version_info:
                tags = version_info['release_tags']
                if len(tags) > 0:
                    tags = [t.lower() for t in tags]
                    for tag_pref in tag_pref_order:
                        if tag_pref in tags:
                            tag = tag_pref
                if tag is None:
                    raise Exception("No release tag found!")
        except Exception as e:
            # print("Exception found: {}".format(str(e)))
            try:
                # print("Searching for module info...")
                mod_info = cat.get_module_info({'module_name': module_name})
                # look for most recent (R > B > D) release tag with the app.
                for tag_pref in tag_pref_order:
                    tag_info = mod_info.get(tag_pref, None)
                    if tag_info is not None and app_name in tag_info.get('narrative_methods', []):
                        tag = tag_pref
                        break
                # print("tag set to {}".format(tag))
            except Exception as e2:
                print("Exception found: {}".format(e2))

    else:
        # it's not an SDK method! do something else!
        return obsolete_method_cell(cell, method_info.get('id'), method_info.get('name'), meta['method'], method_params)

    new_meta = {
        'type': 'app',
        'attributes': {
            'title': method_info.get('name', 'Unnamed App'),
            'id': unicode(uuid.uuid4()),
            'status': 'new',
            'created': ts,          # default to last saved time
            'lastLoaded': ts,
        },
        'appCell': {
            'app': {
                'id': method_info.get('id', 'unknown'),
                'gitCommitHash': git_hash,
                'version': method_info.get('ver', None),
                'tag': tag
            },
            'state': {
                'edit': 'editing',
                'params': None,
                'code': None,
                'request': None,
                'result': None
            },
            'params': method_params,
            'user-settings': {
                'showCodeInputArea': False,
                'showDeveloperOptions': False
            }
            # 'fsm': {
            #     'currentState': fsm_state
            # }
        }
    }

    # Finally, turn it into a code cell.
    cell['cell_type'] = u'code'
    cell['execution_count'] = None
    cell['outputs'] = []
    cell['metadata']['kbase'] = new_meta
    del cell['metadata']['kb-cell']
    cell['source'] = u''
    return cell