Example #1
0
def snapshot_pipeline_step(pipeline, step, nb_path, before=True):
    """Take a snapshot of a pipeline step with Rok."""
    # Mark the start of the snapshotting procedure
    log.info("%s Starting Rok snapshot procedure... (%s) %s", "-" * 10,
             "before" if before else "after", "-" * 10)
    from rok_gw_client.client import RokClient

    log.info("Retrieving KFP run ID...")
    run_uuid = get_run_uuid()
    log.info("Retrieved KFP run ID: %s", run_uuid)
    bucket = kfputils.get_experiment_from_run_id(run_uuid).name
    obj = "{}-{}".format(pipeline, run_uuid)
    commit_title = "Step: {} ({})".format(step, "start" if before else "end")
    commit_message = "Autosnapshot {} step '{}' of pipeline run '{}'".format(
        "before" if before else "after", step, run_uuid)
    environment = json.dumps({
        "KALE_PIPELINE_STEP": step,
        "KALE_NOTEBOOK_PATH": nb_path
    })
    metadata = json.dumps({"environment": environment, "kfp_runid": run_uuid})
    params = {
        "pod": get_pod_name(),
        "metadata": metadata,
        "default_container": "main",
        "commit_title": commit_title,
        "commit_message": commit_message
    }
    rok = RokClient()
    # Create the bucket in case it does not exist
    create_rok_bucket(bucket, client=rok)
    log.info("Registering Rok version for '%s/%s'...", bucket, obj)
    task_info = rok.version_register(bucket, obj, "pod", params, wait=True)
    # FIXME: How do we retrieve the base URL of the ROK UI?
    version = task_info["task"]["result"]["event"]["version"]
    url_path = ("/rok/buckets/%s/files/%s/versions/%s" %
                (encode_url_component(bucket), encode_url_component(obj),
                 encode_url_component(version)))
    log.info("Successfully registered Rok version '%s'", version)

    log.info("Successfully created snapshot for step '%s'", step)
    if before:
        log.info("You can explore the state of the notebook at the beginning"
                 " of this step by spawning a new notebook from the following"
                 " Rok snapshot:")
    log.info("%s", url_path)

    md_source = ("# Rok autosnapshot\n"
                 "Rok has successfully created a snapshot for step `%s`.\n\n"
                 "To **explore the execution state** at the beginning of "
                 "this step follow the instructions below:\n\n"
                 "1\\. View the [snapshot in the Rok UI](%s).\n\n"
                 "2\\. Copy the Rok URL.\n\n"
                 "3\\. Create a new Notebook Server by using this Rok URL to "
                 "autofill the form." % (step, url_path))
    if before:
        metadata = {
            "outputs": [{
                "storage": "inline",
                "source": md_source,
                "type": "markdown"
            }]
        }
        with open("/mlpipeline-ui-metadata.json", "w") as f:
            json.dump(metadata, f)
    # Mark the end of the snapshotting procedure
    log.info("%s Successfully ran Rok snapshot procedure (%s) %s", "-" * 10,
             "before" if before else "after", "-" * 10)

    return task_info
Example #2
0
def snapshot_pipeline_step(pipeline, step, nb_path, before=True):
    """Take a snapshot of a pipeline step with Rok."""
    # Mark the start of the snapshotting procedure
    log.info("%s Starting Rok snapshot procedure... (%s) %s", "-" * 10,
             "before" if before else "after", "-" * 10)

    log.info("Retrieving KFP run ID...")
    run_uuid = podutils.get_run_uuid()
    log.info("Retrieved KFP run ID: %s", run_uuid)
    bucket = kfputils.get_experiment_from_run_id(run_uuid).name
    obj = "{}-{}".format(pipeline, run_uuid)
    commit_title = "Step: {} ({})".format(step, "start" if before else "end")
    commit_message = "Autosnapshot {} step '{}' of pipeline run '{}'".format(
        "before" if before else "after", step, run_uuid)
    environment = json.dumps({
        "KALE_PIPELINE_STEP": step,
        "KALE_NOTEBOOK_PATH": nb_path,
        "KALE_SNAPSHOT_FINAL": not before
    })
    metadata = json.dumps({
        "environment": environment,
        "kfp_runid": kfputils.format_kfp_run_id_uri(run_uuid),
        "state": "initial" if before else "final"
    })
    params = {
        "pod": podutils.get_pod_name(),
        "metadata": metadata,
        "default_container": "main",
        "commit_title": commit_title,
        "commit_message": commit_message
    }
    rok = get_client()
    # Create the bucket in case it does not exist
    create_rok_bucket(bucket)
    log.info("Registering Rok version for '%s/%s'...", bucket, obj)
    task_info = rok.version_register(bucket, obj, "pod", params, wait=True)
    # FIXME: How do we retrieve the base URL of the ROK UI?
    version = task_info["task"]["result"]["event"]["version"]
    url_path = (
        "/rok/buckets/%s/files/%s/versions/%s?ns=%s" %
        (utils.encode_url_component(bucket), utils.encode_url_component(obj),
         utils.encode_url_component(version),
         utils.encode_url_component(podutils.get_namespace())))
    log.info("Successfully registered Rok version '%s'", version)

    log.info("Successfully created snapshot for step '%s'", step)
    if before:
        log.info("You can explore the state of the notebook at the beginning"
                 " of this step by spawning a new notebook from the following"
                 " Rok snapshot:")
    log.info("%s", url_path)

    reproduce_steps = ("To **explore the execution state** at the **%s** of"
                       " this step follow the instructions below:\n\n"
                       "1\\. View the [snapshot in the Rok UI](%s).\n\n"
                       "2\\. Copy the Rok URL.\n\n"
                       "3\\. Create a new Notebook Server by using this Rok"
                       " URL to autofill the form.")

    if before:
        md_source = (("# Rok autosnapshot\n"
                      "Rok has successfully created a snapshot for step `%s`."
                      "\n\n" + reproduce_steps) %
                     (step, "beginning", url_path))
    else:
        md_source = (("# Rok final autosnapshot\n"
                      "Rok has successfully created a snapshot **after** the"
                      " execution of step `%s`.\n\n" + reproduce_steps) %
                     (step, "end", url_path))

    try:
        metadataui = kfputils.get_current_uimetadata(default_if_not_exist=True)
    except json.JSONDecodeError:
        log.error("This step will not create a Rok markdown artifact.")
    else:
        metadataui["outputs"].append({
            "storage": "inline",
            "source": md_source,
            "type": "markdown"
        })
        with open(kfputils.KFP_UI_METADATA_FILE_PATH, "w") as f:
            json.dump(metadataui, f)
    # Mark the end of the snapshotting procedure
    log.info("%s Successfully ran Rok snapshot procedure (%s) %s", "-" * 10,
             "before" if before else "after", "-" * 10)

    return task_info
Example #3
0
    def _create_rok_artifact_from_task(self, task):
        result = task["task"]["result"]
        snapshot_id = result["event"]["id"]
        version = result["event"]["version"]
        obj = result["event"]["object"]
        bucket = task["task"]["bucket"]
        artifact_name = task["task"]["action_params"]["params"]["commit_title"]
        log.info("Creating %s artifact for '%s/%s?version=%s...'",
                 ROK_SNAPSHOT_ARTIFACT_TYPE_NAME, bucket, obj, version)
        from rok_gw_client.client import RokClient
        rok_client = RokClient()
        task_info = rok_client.version_info(bucket, obj, version)
        members = int(task_info["group_member_count"])
        url = task_info["rok_url"]
        uri = ("/rok/buckets/%s/files/%s/versions/%s?ns=%s" %
               (utils.encode_url_component(bucket),
                utils.encode_url_component(obj),
                utils.encode_url_component(version),
                utils.encode_url_component(self.pod_namespace)))
        hash_value = task_info["hash"]

        property_types = ROK_SNAPSHOT_ARTIFACT_PROPERTIES

        values = {
            "name": metadata_store_pb2.Value(string_value=artifact_name),
            "id": metadata_store_pb2.Value(string_value=snapshot_id),
            "version": metadata_store_pb2.Value(string_value=version),
            "object": metadata_store_pb2.Value(string_value=obj),
            "bucket": metadata_store_pb2.Value(string_value=bucket),
            "members": metadata_store_pb2.Value(int_value=members),
            "URL": metadata_store_pb2.Value(string_value=url),
            "hash": metadata_store_pb2.Value(string_value=hash_value)
        }

        custom_properties = dict()
        for i in range(members):
            member_name = "member_%s" % i
            member_obj = task_info.get("group_%s_object" % member_name)
            member_version = task_info.get("group_%s_version" % member_name)
            if not member_obj or not member_version:
                continue
            member_info = rok_client.version_info(bucket, member_obj,
                                                  member_version)
            member_mp = metadata_store_pb2.Value(
                string_value=member_info.get("meta_mountpoint"))
            member_url = metadata_store_pb2.Value(
                string_value=member_info.get("rok_url"))
            member_hash = metadata_store_pb2.Value(
                string_value=member_info.get("hash"))
            custom_properties["%s_URL" % member_name] = member_url
            custom_properties["%s_mount_point" % member_name] = member_mp
            custom_properties["%s_hash" % member_name] = member_hash

        # KFP UI groups Artifacts by run_id/pipeline_name/workspace before
        # switching to contexts:
        # https://github.com/kubeflow/pipelines/pull/2852
        # https://github.com/kubeflow/pipelines/pull/3485#issuecomment-612722767
        custom_properties["run_id"] = metadata_store_pb2.Value(
            string_value=kfputils.format_kfp_run_id_uri(self.run_uuid))

        return self._create_artifact_with_type(
            uri, ROK_SNAPSHOT_ARTIFACT_TYPE_NAME, property_types, values,
            custom_properties or None)