def _get_or_create_run_context(self): run_id = metadata_store_pb2.Value( string_value=kfputils.format_kfp_run_id_uri(self.run_uuid)) workflow_name = metadata_store_pb2.Value( string_value=self.workflow_name) pipeline_name = metadata_store_pb2.Value( string_value=self.pipeline_name) context_name = self.workflow_name property_types = { "run_id": metadata_store_pb2.STRING, "pipeline_name": metadata_store_pb2.STRING, "workflow_name": metadata_store_pb2.STRING } properties = { "run_id": run_id, "pipeline_name": pipeline_name, "workflow_name": workflow_name } return self._get_or_create_context_with_type( context_name=context_name, type_name=RUN_CONTEXT_TYPE_NAME, property_types=property_types, properties=properties)
def _create_execution_in_run_context(self): run_id = metadata_store_pb2.Value( string_value=kfputils.format_kfp_run_id_uri(self.run_uuid)) pipeline_name = metadata_store_pb2.Value( string_value=self.pipeline_name) component_id = metadata_store_pb2.Value(string_value=self.component_id) state = metadata_store_pb2.Execution.RUNNING state_mlmd_value = metadata_store_pb2.Value( string_value=KALE_EXECUTION_STATE_RUNNING) property_types = { "run_id": metadata_store_pb2.STRING, "pipeline_name": metadata_store_pb2.STRING, "component_id": metadata_store_pb2.STRING, MLMD_EXECUTION_STATE_KEY: metadata_store_pb2.STRING } properties = { "run_id": run_id, "pipeline_name": pipeline_name, "component_id": component_id, MLMD_EXECUTION_STATE_KEY: state_mlmd_value } exec_hash_mlmd_value = metadata_store_pb2.Value( string_value=self.execution_hash) pod_name_mlmd_value = metadata_store_pb2.Value( string_value=self.pod_name) pod_namespace_mlmd = metadata_store_pb2.Value( string_value=self.pod_namespace) custom_props = { MLMD_EXECUTION_HASH_PROPERTY_KEY: exec_hash_mlmd_value, MLMD_EXECUTION_POD_NAME_PROPERTY_KEY: pod_name_mlmd_value, MLMD_EXECUTION_CACHE_POD_NAME_PROPERTY_KEY: pod_name_mlmd_value, MLMD_EXECUTION_POD_NAMESPACE_PROPERTY_KEY: pod_namespace_mlmd, KALE_EXECUTION_STATE_KEY: state_mlmd_value } execution = self._create_execution_with_type( type_name=self.component_id, property_types=property_types, properties=properties, custom_properties=custom_props, state=state) association = metadata_store_pb2.Association( execution_id=execution.id, context_id=self.run_context.id) self.store.put_attributions_and_associations([], [association]) return execution
def snapshot_pipeline_step(pipeline, step, nb_path, before=True): """Take a snapshot of a pipeline step with Rok.""" # Mark the start of the snapshotting procedure log.info("%s Starting Rok snapshot procedure... (%s) %s", "-" * 10, "before" if before else "after", "-" * 10) log.info("Retrieving KFP run ID...") run_uuid = podutils.get_run_uuid() log.info("Retrieved KFP run ID: %s", run_uuid) bucket = kfputils.get_experiment_from_run_id(run_uuid).name obj = "{}-{}".format(pipeline, run_uuid) commit_title = "Step: {} ({})".format(step, "start" if before else "end") commit_message = "Autosnapshot {} step '{}' of pipeline run '{}'".format( "before" if before else "after", step, run_uuid) environment = json.dumps({ "KALE_PIPELINE_STEP": step, "KALE_NOTEBOOK_PATH": nb_path, "KALE_SNAPSHOT_FINAL": not before }) metadata = json.dumps({ "environment": environment, "kfp_runid": kfputils.format_kfp_run_id_uri(run_uuid), "state": "initial" if before else "final" }) params = { "pod": podutils.get_pod_name(), "metadata": metadata, "default_container": "main", "commit_title": commit_title, "commit_message": commit_message } rok = get_client() # Create the bucket in case it does not exist create_rok_bucket(bucket) log.info("Registering Rok version for '%s/%s'...", bucket, obj) task_info = rok.version_register(bucket, obj, "pod", params, wait=True) # FIXME: How do we retrieve the base URL of the ROK UI? version = task_info["task"]["result"]["event"]["version"] url_path = ( "/rok/buckets/%s/files/%s/versions/%s?ns=%s" % (utils.encode_url_component(bucket), utils.encode_url_component(obj), utils.encode_url_component(version), utils.encode_url_component(podutils.get_namespace()))) log.info("Successfully registered Rok version '%s'", version) log.info("Successfully created snapshot for step '%s'", step) if before: log.info("You can explore the state of the notebook at the beginning" " of this step by spawning a new notebook from the following" " Rok snapshot:") log.info("%s", url_path) reproduce_steps = ("To **explore the execution state** at the **%s** of" " this step follow the instructions below:\n\n" "1\\. View the [snapshot in the Rok UI](%s).\n\n" "2\\. Copy the Rok URL.\n\n" "3\\. Create a new Notebook Server by using this Rok" " URL to autofill the form.") if before: md_source = (("# Rok autosnapshot\n" "Rok has successfully created a snapshot for step `%s`." "\n\n" + reproduce_steps) % (step, "beginning", url_path)) else: md_source = (("# Rok final autosnapshot\n" "Rok has successfully created a snapshot **after** the" " execution of step `%s`.\n\n" + reproduce_steps) % (step, "end", url_path)) try: metadataui = kfputils.get_current_uimetadata(default_if_not_exist=True) except json.JSONDecodeError: log.error("This step will not create a Rok markdown artifact.") else: metadataui["outputs"].append({ "storage": "inline", "source": md_source, "type": "markdown" }) with open(kfputils.KFP_UI_METADATA_FILE_PATH, "w") as f: json.dump(metadataui, f) # Mark the end of the snapshotting procedure log.info("%s Successfully ran Rok snapshot procedure (%s) %s", "-" * 10, "before" if before else "after", "-" * 10) return task_info
def _create_rok_artifact_from_task(self, task): result = task["task"]["result"] snapshot_id = result["event"]["id"] version = result["event"]["version"] obj = result["event"]["object"] bucket = task["task"]["bucket"] artifact_name = task["task"]["action_params"]["params"]["commit_title"] log.info("Creating %s artifact for '%s/%s?version=%s...'", ROK_SNAPSHOT_ARTIFACT_TYPE_NAME, bucket, obj, version) from rok_gw_client.client import RokClient rok_client = RokClient() task_info = rok_client.version_info(bucket, obj, version) members = int(task_info["group_member_count"]) url = task_info["rok_url"] uri = ("/rok/buckets/%s/files/%s/versions/%s?ns=%s" % (utils.encode_url_component(bucket), utils.encode_url_component(obj), utils.encode_url_component(version), utils.encode_url_component(self.pod_namespace))) hash_value = task_info["hash"] property_types = ROK_SNAPSHOT_ARTIFACT_PROPERTIES values = { "name": metadata_store_pb2.Value(string_value=artifact_name), "id": metadata_store_pb2.Value(string_value=snapshot_id), "version": metadata_store_pb2.Value(string_value=version), "object": metadata_store_pb2.Value(string_value=obj), "bucket": metadata_store_pb2.Value(string_value=bucket), "members": metadata_store_pb2.Value(int_value=members), "URL": metadata_store_pb2.Value(string_value=url), "hash": metadata_store_pb2.Value(string_value=hash_value) } custom_properties = dict() for i in range(members): member_name = "member_%s" % i member_obj = task_info.get("group_%s_object" % member_name) member_version = task_info.get("group_%s_version" % member_name) if not member_obj or not member_version: continue member_info = rok_client.version_info(bucket, member_obj, member_version) member_mp = metadata_store_pb2.Value( string_value=member_info.get("meta_mountpoint")) member_url = metadata_store_pb2.Value( string_value=member_info.get("rok_url")) member_hash = metadata_store_pb2.Value( string_value=member_info.get("hash")) custom_properties["%s_URL" % member_name] = member_url custom_properties["%s_mount_point" % member_name] = member_mp custom_properties["%s_hash" % member_name] = member_hash # KFP UI groups Artifacts by run_id/pipeline_name/workspace before # switching to contexts: # https://github.com/kubeflow/pipelines/pull/2852 # https://github.com/kubeflow/pipelines/pull/3485#issuecomment-612722767 custom_properties["run_id"] = metadata_store_pb2.Value( string_value=kfputils.format_kfp_run_id_uri(self.run_uuid)) return self._create_artifact_with_type( uri, ROK_SNAPSHOT_ARTIFACT_TYPE_NAME, property_types, values, custom_properties or None)