예제 #1
0
    def _get_or_create_run_context(self):
        run_id = metadata_store_pb2.Value(
            string_value=kfputils.format_kfp_run_id_uri(self.run_uuid))
        workflow_name = metadata_store_pb2.Value(
            string_value=self.workflow_name)
        pipeline_name = metadata_store_pb2.Value(
            string_value=self.pipeline_name)
        context_name = self.workflow_name

        property_types = {
            "run_id": metadata_store_pb2.STRING,
            "pipeline_name": metadata_store_pb2.STRING,
            "workflow_name": metadata_store_pb2.STRING
        }
        properties = {
            "run_id": run_id,
            "pipeline_name": pipeline_name,
            "workflow_name": workflow_name
        }

        return self._get_or_create_context_with_type(
            context_name=context_name,
            type_name=RUN_CONTEXT_TYPE_NAME,
            property_types=property_types,
            properties=properties)
예제 #2
0
    def _create_execution_in_run_context(self):
        run_id = metadata_store_pb2.Value(
            string_value=kfputils.format_kfp_run_id_uri(self.run_uuid))
        pipeline_name = metadata_store_pb2.Value(
            string_value=self.pipeline_name)
        component_id = metadata_store_pb2.Value(string_value=self.component_id)
        state = metadata_store_pb2.Execution.RUNNING
        state_mlmd_value = metadata_store_pb2.Value(
            string_value=KALE_EXECUTION_STATE_RUNNING)

        property_types = {
            "run_id": metadata_store_pb2.STRING,
            "pipeline_name": metadata_store_pb2.STRING,
            "component_id": metadata_store_pb2.STRING,
            MLMD_EXECUTION_STATE_KEY: metadata_store_pb2.STRING
        }
        properties = {
            "run_id": run_id,
            "pipeline_name": pipeline_name,
            "component_id": component_id,
            MLMD_EXECUTION_STATE_KEY: state_mlmd_value
        }

        exec_hash_mlmd_value = metadata_store_pb2.Value(
            string_value=self.execution_hash)
        pod_name_mlmd_value = metadata_store_pb2.Value(
            string_value=self.pod_name)
        pod_namespace_mlmd = metadata_store_pb2.Value(
            string_value=self.pod_namespace)
        custom_props = {
            MLMD_EXECUTION_HASH_PROPERTY_KEY: exec_hash_mlmd_value,
            MLMD_EXECUTION_POD_NAME_PROPERTY_KEY: pod_name_mlmd_value,
            MLMD_EXECUTION_CACHE_POD_NAME_PROPERTY_KEY: pod_name_mlmd_value,
            MLMD_EXECUTION_POD_NAMESPACE_PROPERTY_KEY: pod_namespace_mlmd,
            KALE_EXECUTION_STATE_KEY: state_mlmd_value
        }
        execution = self._create_execution_with_type(
            type_name=self.component_id,
            property_types=property_types,
            properties=properties,
            custom_properties=custom_props,
            state=state)

        association = metadata_store_pb2.Association(
            execution_id=execution.id, context_id=self.run_context.id)
        self.store.put_attributions_and_associations([], [association])
        return execution
예제 #3
0
def snapshot_pipeline_step(pipeline, step, nb_path, before=True):
    """Take a snapshot of a pipeline step with Rok."""
    # Mark the start of the snapshotting procedure
    log.info("%s Starting Rok snapshot procedure... (%s) %s", "-" * 10,
             "before" if before else "after", "-" * 10)

    log.info("Retrieving KFP run ID...")
    run_uuid = podutils.get_run_uuid()
    log.info("Retrieved KFP run ID: %s", run_uuid)
    bucket = kfputils.get_experiment_from_run_id(run_uuid).name
    obj = "{}-{}".format(pipeline, run_uuid)
    commit_title = "Step: {} ({})".format(step, "start" if before else "end")
    commit_message = "Autosnapshot {} step '{}' of pipeline run '{}'".format(
        "before" if before else "after", step, run_uuid)
    environment = json.dumps({
        "KALE_PIPELINE_STEP": step,
        "KALE_NOTEBOOK_PATH": nb_path,
        "KALE_SNAPSHOT_FINAL": not before
    })
    metadata = json.dumps({
        "environment": environment,
        "kfp_runid": kfputils.format_kfp_run_id_uri(run_uuid),
        "state": "initial" if before else "final"
    })
    params = {
        "pod": podutils.get_pod_name(),
        "metadata": metadata,
        "default_container": "main",
        "commit_title": commit_title,
        "commit_message": commit_message
    }
    rok = get_client()
    # Create the bucket in case it does not exist
    create_rok_bucket(bucket)
    log.info("Registering Rok version for '%s/%s'...", bucket, obj)
    task_info = rok.version_register(bucket, obj, "pod", params, wait=True)
    # FIXME: How do we retrieve the base URL of the ROK UI?
    version = task_info["task"]["result"]["event"]["version"]
    url_path = (
        "/rok/buckets/%s/files/%s/versions/%s?ns=%s" %
        (utils.encode_url_component(bucket), utils.encode_url_component(obj),
         utils.encode_url_component(version),
         utils.encode_url_component(podutils.get_namespace())))
    log.info("Successfully registered Rok version '%s'", version)

    log.info("Successfully created snapshot for step '%s'", step)
    if before:
        log.info("You can explore the state of the notebook at the beginning"
                 " of this step by spawning a new notebook from the following"
                 " Rok snapshot:")
    log.info("%s", url_path)

    reproduce_steps = ("To **explore the execution state** at the **%s** of"
                       " this step follow the instructions below:\n\n"
                       "1\\. View the [snapshot in the Rok UI](%s).\n\n"
                       "2\\. Copy the Rok URL.\n\n"
                       "3\\. Create a new Notebook Server by using this Rok"
                       " URL to autofill the form.")

    if before:
        md_source = (("# Rok autosnapshot\n"
                      "Rok has successfully created a snapshot for step `%s`."
                      "\n\n" + reproduce_steps) %
                     (step, "beginning", url_path))
    else:
        md_source = (("# Rok final autosnapshot\n"
                      "Rok has successfully created a snapshot **after** the"
                      " execution of step `%s`.\n\n" + reproduce_steps) %
                     (step, "end", url_path))

    try:
        metadataui = kfputils.get_current_uimetadata(default_if_not_exist=True)
    except json.JSONDecodeError:
        log.error("This step will not create a Rok markdown artifact.")
    else:
        metadataui["outputs"].append({
            "storage": "inline",
            "source": md_source,
            "type": "markdown"
        })
        with open(kfputils.KFP_UI_METADATA_FILE_PATH, "w") as f:
            json.dump(metadataui, f)
    # Mark the end of the snapshotting procedure
    log.info("%s Successfully ran Rok snapshot procedure (%s) %s", "-" * 10,
             "before" if before else "after", "-" * 10)

    return task_info
예제 #4
0
    def _create_rok_artifact_from_task(self, task):
        result = task["task"]["result"]
        snapshot_id = result["event"]["id"]
        version = result["event"]["version"]
        obj = result["event"]["object"]
        bucket = task["task"]["bucket"]
        artifact_name = task["task"]["action_params"]["params"]["commit_title"]
        log.info("Creating %s artifact for '%s/%s?version=%s...'",
                 ROK_SNAPSHOT_ARTIFACT_TYPE_NAME, bucket, obj, version)
        from rok_gw_client.client import RokClient
        rok_client = RokClient()
        task_info = rok_client.version_info(bucket, obj, version)
        members = int(task_info["group_member_count"])
        url = task_info["rok_url"]
        uri = ("/rok/buckets/%s/files/%s/versions/%s?ns=%s" %
               (utils.encode_url_component(bucket),
                utils.encode_url_component(obj),
                utils.encode_url_component(version),
                utils.encode_url_component(self.pod_namespace)))
        hash_value = task_info["hash"]

        property_types = ROK_SNAPSHOT_ARTIFACT_PROPERTIES

        values = {
            "name": metadata_store_pb2.Value(string_value=artifact_name),
            "id": metadata_store_pb2.Value(string_value=snapshot_id),
            "version": metadata_store_pb2.Value(string_value=version),
            "object": metadata_store_pb2.Value(string_value=obj),
            "bucket": metadata_store_pb2.Value(string_value=bucket),
            "members": metadata_store_pb2.Value(int_value=members),
            "URL": metadata_store_pb2.Value(string_value=url),
            "hash": metadata_store_pb2.Value(string_value=hash_value)
        }

        custom_properties = dict()
        for i in range(members):
            member_name = "member_%s" % i
            member_obj = task_info.get("group_%s_object" % member_name)
            member_version = task_info.get("group_%s_version" % member_name)
            if not member_obj or not member_version:
                continue
            member_info = rok_client.version_info(bucket, member_obj,
                                                  member_version)
            member_mp = metadata_store_pb2.Value(
                string_value=member_info.get("meta_mountpoint"))
            member_url = metadata_store_pb2.Value(
                string_value=member_info.get("rok_url"))
            member_hash = metadata_store_pb2.Value(
                string_value=member_info.get("hash"))
            custom_properties["%s_URL" % member_name] = member_url
            custom_properties["%s_mount_point" % member_name] = member_mp
            custom_properties["%s_hash" % member_name] = member_hash

        # KFP UI groups Artifacts by run_id/pipeline_name/workspace before
        # switching to contexts:
        # https://github.com/kubeflow/pipelines/pull/2852
        # https://github.com/kubeflow/pipelines/pull/3485#issuecomment-612722767
        custom_properties["run_id"] = metadata_store_pb2.Value(
            string_value=kfputils.format_kfp_run_id_uri(self.run_uuid))

        return self._create_artifact_with_type(
            uri, ROK_SNAPSHOT_ARTIFACT_TYPE_NAME, property_types, values,
            custom_properties or None)