def update_uimetadata(artifact_name, uimetadata_path='/mlpipeline-ui-metadata.json'): """Update ui-metadata dictionary with a new web-app entry. Args: artifact_name: Name of the artifact uimetadata_path: path to mlpipeline-ui-metadata.json """ # Default empty ui-metadata dict outputs = {"outputs": []} if os.path.exists(uimetadata_path): try: outputs = json.loads(open(uimetadata_path, 'r').read()) if not outputs.get('outputs', None): outputs['outputs'] = [] except json.JSONDecodeError as e: print("Failed to parse json file {}: {}\n" "This step will not be able to visualize artifacts in the" " KFP UI".format(uimetadata_path, e)) pod_name = podutils.get_pod_name() namespace = podutils.get_namespace() workflow_name = workflowutils.get_workflow_name(pod_name, namespace) html_artifact_entry = [{ 'type': 'web-app', 'storage': 'minio', 'source': 'minio://mlpipeline/artifacts/{}/{}/{}'.format(workflow_name, pod_name, artifact_name + '.tgz') }] outputs['outputs'] += html_artifact_entry with open(uimetadata_path, "w") as f: json.dump(outputs, f)
def update_uimetadata(artifact_name, uimetadata_path=KFP_UI_METADATA_FILE_PATH): """Update ui-metadata dictionary with a new web-app entry. Args: artifact_name: Name of the artifact uimetadata_path: path to mlpipeline-ui-metadata.json """ try: outputs = get_current_uimetadata(uimetadata_path, default_if_not_exist=True) except json.JSONDecodeError: log.error("This step will not be able to visualize artifacts in the" " KFP UI") return pod_name = podutils.get_pod_name() namespace = podutils.get_namespace() workflow_name = workflowutils.get_workflow_name(pod_name, namespace) html_artifact_entry = [{ 'type': 'web-app', 'storage': 'minio', 'source': 'minio://mlpipeline/artifacts/{}/{}/{}'.format(workflow_name, pod_name, artifact_name + '.tgz') }] outputs['outputs'] += html_artifact_entry with open(uimetadata_path, "w") as f: json.dump(outputs, f)
def is_kfp_step() -> bool: """Detect if running inside a KFP step. The detection involves two steps: 1. Auto-detect if the current Pod is part of an Argo workflow 2. Read one of the annotations that the KFP API Server sets in the workflow object (one-off runs and recurring ones have different annotations). """ log.info("Checking if running inside a KFP step...") try: namespace = podutils.get_namespace() workflow = workflowutils.get_workflow( workflowutils.get_workflow_name(podutils.get_pod_name(), namespace), namespace) annotations = workflow["metadata"]["annotations"] try: _ = annotations[KFP_RUN_NAME_ANNOTATION_KEY] except KeyError: _ = annotations[KFP_SWF_NAME_ANNOTATION_KEY] except Exception: log.info("Not in a KFP step.") return False log.info("Running in a KFP step.") return True
def snapshot_pod(bucket=DEFAULT_BUCKET, wait=False, interactive=False): """Take a Rok snapshot of the current Pod.""" rok = get_client() pod_name = podutils.get_pod_name() namespace = podutils.get_namespace() log.info("Taking a snapshot of pod %s in namespace %s ..." % (pod_name, namespace)) commit_title = "Snapshot of pod {}".format(pod_name) commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format( pod_name, namespace) params = { "pod": pod_name, "default_container": podutils.get_container_name(), "namespace": namespace, "commit_title": commit_title, "commit_message": commit_message } # Create the bucket in case it does not exist create_rok_bucket(bucket) task_info = rok.version_register(bucket, pod_name, "pod", params, wait=wait and not interactive) if wait: if interactive: task_id = task_info["task"]["id"] return monitor_snapshot_task(task_id) else: log.info("Successfully took Rok snapshot") return task_info
def __init__(self): log.info("%s Initializing MLMD context... %s", "-" * 10, "-" * 10) log.info("Connecting to MLMD...") self.store = self._connect() log.info("Successfully connected to MLMD") log.info("Getting step details...") log.info("Getting pod name...") self.pod_name = podutils.get_pod_name() log.info("Successfully retrieved pod name: %s", self.pod_name) log.info("Getting pod namespace...") self.pod_namespace = podutils.get_namespace() log.info("Successfully retrieved pod namespace: %s", self.pod_namespace) log.info("Getting pod...") self.pod = podutils.get_pod(self.pod_name, self.pod_namespace) log.info("Successfully retrieved pod") log.info("Getting workflow name from pod...") self.workflow_name = self.pod.metadata.labels.get( workflowutils.ARGO_WORKFLOW_LABEL_KEY) log.info("Successfully retrieved workflow name: %s", self.workflow_name) log.info("Getting workflow...") self.workflow = workflowutils.get_workflow(self.workflow_name, self.pod_namespace) log.info("Successfully retrieved workflow") workflow_labels = self.workflow["metadata"].get("labels", {}) self.run_uuid = workflow_labels.get(podutils.KFP_RUN_ID_LABEL_KEY, self.workflow_name) log.info("Successfully retrieved KFP run ID: %s", self.run_uuid) workflow_annotations = self.workflow["metadata"].get("annotations", {}) pipeline_spec = json.loads( workflow_annotations.get("pipelines.kubeflow.org/pipeline_spec", "{}")) self.pipeline_name = pipeline_spec.get("name", self.workflow_name) if self.pipeline_name: log.info("Successfully retrieved KFP pipeline_name: %s", self.pipeline_name) else: log.info("Could not retrieve KFP pipeline name") self.component_id = podutils.compute_component_id(self.pod) self.execution_hash = self.pod.metadata.annotations.get( MLMD_EXECUTION_HASH_PROPERTY_KEY) if self.execution_hash: log.info("Successfully retrieved execution hash: %s", self.execution_hash) else: self.execution_hash = utils.random_string(10) log.info( "Failed to retrieve execution hash." " Generating random string...: %s", self.execution_hash) self.run_context = self._get_or_create_run_context() self.execution = self._create_execution_in_run_context() self._label_with_context_and_execution() log.info("%s Successfully initialized MLMD context %s", "-" * 10, "-" * 10)
def find_poddefault_labels_on_server(request): """Find server's labels that correspond to poddefaults applied.""" request.log.info("Retrieving PodDefaults applied to server...") applied_poddefaults = kfutils.find_applied_poddefaults( podutils.get_pod(podutils.get_pod_name(), podutils.get_namespace()), kfutils.list_poddefaults()) pd_names = [pd["metadata"]["name"] for pd in applied_poddefaults] request.log.info("Retrieved applied PodDefaults: %s", pd_names) labels = kfutils.get_poddefault_labels(applied_poddefaults) request.log.info("PodDefault labels applied on server: %s", ", ".join(["%s: %s" % (k, v) for k, v in labels.items()])) return labels
def detect_run_uuid() -> str: """Get the workflow's UUID form inside a pipeline step.""" namespace = podutils.get_namespace() workflow = workflowutils.get_workflow( workflowutils.get_workflow_name(podutils.get_pod_name(), namespace), namespace) run_uuid = (workflow["metadata"].get("labels", {}).get(KFP_RUN_ID_LABEL_KEY, None)) # KFP api-server adds run UUID as label to workflows for KFP>=0.1.26. # Return run UUID if available. Else return workflow UUID to maintain # backwards compatibility. return run_uuid or workflow["metadata"]["uid"]
def snapshot_notebook(bucket=DEFAULT_BUCKET, obj=None): """Take a Rok snapshot of the current Notebook.""" rok = get_client() pod_name = podutils.get_pod_name() namespace = podutils.get_namespace() log.info("Taking a snapshot of notebook %s in namespace %s ..." % (pod_name, namespace)) commit_title = "Snapshot of notebook {}".format(pod_name) commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format(pod_name, namespace) params = {"namespace": namespace, "commit_title": commit_title, "commit_message": commit_message} obj = obj or pod_name # Create the bucket in case it does not exist create_rok_bucket(bucket) return rok.version_register(bucket, obj, "jupyter", params)
def snapshot_notebook(request, bucket=DEFAULT_BUCKET, obj=None): """Perform a snapshot over the notebook's pod.""" rok = _get_client() hostname = os.getenv("HOSTNAME") namespace = podutils.get_namespace() commit_title = "Snapshot of notebook {}".format(hostname) commit_message = NOTEBOOK_SNAPSHOT_COMMIT_MESSAGE.format( hostname, namespace) params = { "namespace": namespace, "commit_title": commit_title, "commit_message": commit_message } obj = obj or podutils.get_pod_name() # Create the bucket in case it does not exist podutils.create_rok_bucket(bucket, client=rok) return rok.version_register(bucket, obj, "jupyter", params)
def check_rok_availability(request): """Check if Rok is available.""" log = request.log if hasattr(request, "log") else logger try: rok = _get_client() except ImportError: log.exception("Failed to import RokClient") raise RPCNotFoundError(details="Rok Gateway Client module not found", trans_id=request.trans_id) except Exception: log.exception("Failed to initialize RokClient") raise RPCServiceUnavailableError(details=("Failed to initialize" " RokClient"), trans_id=request.trans_id) try: rok.account_info() except Exception: log.exception("Failed to retrieve account information") raise RPCServiceUnavailableError(details="Failed to access Rok", trans_id=request.trans_id) name = podutils.get_pod_name() namespace = podutils.get_namespace() try: suggestions = rok.version_register_suggest(DEFAULT_BUCKET, name, "jupyter", "params:lab", {"namespace": namespace}, ignore_env=True) except Exception as e: log.exception("Failed to list lab suggestions") message = "%s: %s" % (e.__class__.__name__, e) raise RPCServiceUnavailableError(message=message, details=("Rok cannot list notebooks" " in this namespace"), trans_id=request.trans_id) if not any(s["value"] == name for s in suggestions): log.error("Could not find notebook '%s' in list of suggestions", name) raise RPCNotFoundError(details=("Could not find this notebook in" " notebooks listed by Rok"), trans_id=request.trans_id)
def update_uimetadata(artifact_name, uimetadata_path=KFP_UI_METADATA_FILE_PATH): """Update ui-metadata dictionary with a new web-app entry. Args: artifact_name: Name of the artifact uimetadata_path: path to mlpipeline-ui-metadata.json """ log.info("Adding artifact '%s' to KFP UI metadata...", artifact_name) try: outputs = get_current_uimetadata(uimetadata_path, default_if_not_exist=True) except json.JSONDecodeError: log.error("This step will not be able to visualize artifacts in the" " KFP UI") return pod_name = podutils.get_pod_name() namespace = podutils.get_namespace() workflow_name = workflowutils.get_workflow_name(pod_name, namespace) html_artifact_entry = [{ 'type': 'web-app', 'storage': 'minio', 'source': 'minio://mlpipeline/artifacts/{}/{}/{}'.format(workflow_name, pod_name, artifact_name + '.tgz') }] outputs['outputs'] += html_artifact_entry try: utils.ensure_or_create_dir(uimetadata_path) except RuntimeError: log.exception( "Writing to '%s' failed. This step will not be able to" " visualize artifacts in the KFP UI.", uimetadata_path) return with open(uimetadata_path, "w") as f: json.dump(outputs, f) log.info("Artifact successfully added")
def snapshot_pipeline_step(pipeline, step, nb_path, before=True): """Take a snapshot of a pipeline step with Rok.""" # Mark the start of the snapshotting procedure log.info("%s Starting Rok snapshot procedure... (%s) %s", "-" * 10, "before" if before else "after", "-" * 10) log.info("Retrieving KFP run ID...") run_uuid = podutils.get_run_uuid() log.info("Retrieved KFP run ID: %s", run_uuid) bucket = kfputils.get_experiment_from_run_id(run_uuid).name obj = "{}-{}".format(pipeline, run_uuid) commit_title = "Step: {} ({})".format(step, "start" if before else "end") commit_message = "Autosnapshot {} step '{}' of pipeline run '{}'".format( "before" if before else "after", step, run_uuid) environment = json.dumps({ "KALE_PIPELINE_STEP": step, "KALE_NOTEBOOK_PATH": nb_path, "KALE_SNAPSHOT_FINAL": not before }) metadata = json.dumps({ "environment": environment, "kfp_runid": kfputils.format_kfp_run_id_uri(run_uuid), "state": "initial" if before else "final" }) params = { "pod": podutils.get_pod_name(), "metadata": metadata, "default_container": "main", "commit_title": commit_title, "commit_message": commit_message } rok = get_client() # Create the bucket in case it does not exist create_rok_bucket(bucket) log.info("Registering Rok version for '%s/%s'...", bucket, obj) task_info = rok.version_register(bucket, obj, "pod", params, wait=True) # FIXME: How do we retrieve the base URL of the ROK UI? version = task_info["task"]["result"]["event"]["version"] url_path = ( "/rok/buckets/%s/files/%s/versions/%s?ns=%s" % (utils.encode_url_component(bucket), utils.encode_url_component(obj), utils.encode_url_component(version), utils.encode_url_component(podutils.get_namespace()))) log.info("Successfully registered Rok version '%s'", version) log.info("Successfully created snapshot for step '%s'", step) if before: log.info("You can explore the state of the notebook at the beginning" " of this step by spawning a new notebook from the following" " Rok snapshot:") log.info("%s", url_path) reproduce_steps = ("To **explore the execution state** at the **%s** of" " this step follow the instructions below:\n\n" "1\\. View the [snapshot in the Rok UI](%s).\n\n" "2\\. Copy the Rok URL.\n\n" "3\\. Create a new Notebook Server by using this Rok" " URL to autofill the form.") if before: md_source = (("# Rok autosnapshot\n" "Rok has successfully created a snapshot for step `%s`." "\n\n" + reproduce_steps) % (step, "beginning", url_path)) else: md_source = (("# Rok final autosnapshot\n" "Rok has successfully created a snapshot **after** the" " execution of step `%s`.\n\n" + reproduce_steps) % (step, "end", url_path)) try: metadataui = kfputils.get_current_uimetadata(default_if_not_exist=True) except json.JSONDecodeError: log.error("This step will not create a Rok markdown artifact.") else: metadataui["outputs"].append({ "storage": "inline", "source": md_source, "type": "markdown" }) with open(kfputils.KFP_UI_METADATA_FILE_PATH, "w") as f: json.dump(metadataui, f) # Mark the end of the snapshotting procedure log.info("%s Successfully ran Rok snapshot procedure (%s) %s", "-" * 10, "before" if before else "after", "-" * 10) return task_info
def serve(model: Any, name: str = None, wait: bool = True, predictor: str = None, preprocessing_fn: Callable = None, preprocessing_assets: Dict = None) -> KFServer: """Main API used to serve models from a notebook or a pipeline step. This function procedurally deploys a KFServing InferenceService, starting from a model object. A summary list of actions follows: * Autogenerate an InferenceService name, if not provided * Process transformer function (and related assets) * Dump the model, to a path under a mounted PVC * Snapshot the PVC * Hydrate a new PVC from the new snapshot * Submit an InferenceService CR * Monitor the CR until it becomes ready FIXME: Improve documentation. Provide some examples in the docstring and explain how the preprocessing function parsing works. Args: model: Model object to be used as a predictor name (optional): Name of the predictor. Will be autogenerated if not provided wait (optional): Wait for the InferenceService to become ready. Default: True predictor (optional): Predictor type to be used for the InferenceService. If not provided it will be inferred using the the matching marshalling backends. preprocessing_fn (optional): A processing function that will be deployed as a KFServing Transformer preprocessing_assets (optional): A dictionary with object required by the preprocessing function. This is needed in case the preprocessing function references global objects. Returns: A KFServer instance """ log.info("Starting serve procedure for model '%s'", model) if not name: name = "%s-%s" % (podutils.get_pod_name(), utils.random_string(5)) # Validate and process transformer if preprocessing_fn: _prepare_transformer_assets(preprocessing_fn, preprocessing_assets) # Detect predictor type predictor_type = marshal.get_backend(model).predictor_type if predictor and predictor != predictor_type: raise RuntimeError("Trying to create an InferenceService with" " predictor of type '%s' but the model is of type" " '%s'" % (predictor, predictor_type)) if not predictor_type: log.error( "Kale does not yet support serving objects with '%s'" " backend.\n\nPlease help us improve Kale by opening a new" " issue at:\n" "https://github.com/kubeflow-kale/kale/issues", marshal.get_backend(model).display_name) utils.graceful_exit(-1) predictor = predictor_type # in case `predictor` is None volume = podutils.get_volume_containing_path(PVC_ROOT) volume_name = volume[1].persistent_volume_claim.claim_name log.info("Model is contained in volume '%s'", volume_name) # Dump the model marshal.set_data_dir(PREDICTOR_MODEL_DIR) model_filepath = marshal.save(model, "model") log.info("Model saved successfully at '%s'", model_filepath) # Take snapshot task_info = rokutils.snapshot_pvc(volume_name, bucket=rokutils.SERVING_BUCKET, wait=True) task = rokutils.get_task(task_info["task"]["id"], bucket=rokutils.SERVING_BUCKET) new_pvc_name = "%s-pvc-%s" % (name, utils.random_string(5)) rokutils.hydrate_pvc_from_snapshot(task["result"]["event"]["object"], task["result"]["event"]["version"], new_pvc_name, bucket=rokutils.SERVING_BUCKET) # Cleanup: remove dumped model and transformer assets from the current PVC utils.rm_r( os.path.join(PREDICTOR_MODEL_DIR, os.path.basename(model_filepath))) utils.rm_r(TRANSFORMER_ASSETS_DIR, silent=True) # Need an absolute path from the *root* of the PVC. Add '/' if not exists. pvc_model_path = "/" + PREDICTOR_MODEL_DIR.lstrip(PVC_ROOT) # Tensorflow saves the model's files into a directory by itself if predictor == "tensorflow": pvc_model_path += "/" + os.path.basename(model_filepath).lstrip("/") kfserver = create_inference_service(name=name, predictor=predictor, pvc_name=new_pvc_name, model_path=pvc_model_path, transformer=preprocessing_fn is not None) if wait: monitor_inference_service(kfserver.name) return kfserver