Beispiel #1
0
    def _create_rok_artifact_from_task(self, task):
        result = task["task"]["result"]
        snapshot_id = result["event"]["id"]
        version = result["event"]["version"]
        obj = result["event"]["object"]
        bucket = task["task"]["bucket"]
        artifact_name = task["task"]["action_params"]["params"]["commit_title"]
        log.info("Creating %s artifact for '%s/%s?version=%s...'",
                 ROK_SNAPSHOT_ARTIFACT_TYPE_NAME, bucket, obj, version)
        from rok_gw_client.client import RokClient
        rok_client = RokClient()
        task_info = rok_client.version_info(bucket, obj, version)
        members = int(task_info["group_member_count"])
        url = task_info["rok_url"]
        uri = "/rok/buckets/%s/files/%s/versions/%s" % (bucket, obj, version)
        hash_value = task_info["hash"]

        property_types = ROK_SNAPSHOT_ARTIFACT_PROPERTIES

        values = {
            "name": metadata_store_pb2.Value(string_value=artifact_name),
            "id": metadata_store_pb2.Value(string_value=snapshot_id),
            "version": metadata_store_pb2.Value(string_value=version),
            "object": metadata_store_pb2.Value(string_value=obj),
            "bucket": metadata_store_pb2.Value(string_value=bucket),
            "members": metadata_store_pb2.Value(int_value=members),
            "URL": metadata_store_pb2.Value(string_value=url),
            "hash": metadata_store_pb2.Value(string_value=hash_value)
        }

        custom_properties = dict()
        for i in range(members):
            member_name = "member_%s" % i
            member_obj = task_info.get("group_%s_object" % member_name)
            member_version = task_info.get("group_%s_version" % member_name)
            if not member_obj or not member_version:
                continue
            member_info = rok_client.version_info(bucket, member_obj,
                                                  member_version)
            member_mp = metadata_store_pb2.Value(
                string_value=member_info.get("meta_mountpoint"))
            member_url = metadata_store_pb2.Value(
                string_value=member_info.get("rok_url"))
            member_hash = metadata_store_pb2.Value(
                string_value=member_info.get("hash"))
            custom_properties["%s_URL" % member_name] = member_url
            custom_properties["%s_mount_point" % member_name] = member_mp
            custom_properties["%s_hash" % member_name] = member_hash

        # KFP UI groups Artifacts by run_id/pipeline_name/workspace before
        # switching to contexts:
        # https://github.com/kubeflow/pipelines/pull/2852
        # https://github.com/kubeflow/pipelines/pull/3485#issuecomment-612722767
        custom_properties["run_id"] = metadata_store_pb2.Value(
            string_value=self.run_uuid)

        return self._create_artifact_with_type(
            uri, ROK_SNAPSHOT_ARTIFACT_TYPE_NAME, property_types, values,
            custom_properties or None)
Beispiel #2
0
def snapshot_pipeline_step(pipeline, step, nb_path):
    """Take a snapshot of a pipeline step with Rok."""
    from rok_gw_client.client import RokClient

    run_uuid = get_run_uuid()
    bucket = kfp_utils.get_experiment_from_run_id(run_uuid).name
    obj = "{}-{}".format(pipeline, run_uuid)
    commit_title = "Step: {}".format(step)
    commit_message = "Step '{}' of pipeline run '{}'".format(step, run_uuid)
    environment = json.dumps({
        "KALE_PIPELINE_STEP": step,
        "KALE_NOTEBOOK_PATH": nb_path
    })
    metadata = json.dumps({"environment": environment, "kfp_runid": run_uuid})
    params = {
        "pod": get_pod_name(),
        "metadata": metadata,
        "default_container": "main",
        "commit_title": commit_title,
        "commit_message": commit_message
    }
    rok = RokClient()
    # Create the bucket in case it does not exist
    create_rok_bucket(bucket, client=rok)
    task_info = rok.version_register(bucket, obj, "pod", params, wait=True)
    print("Successfully created snapshot for step '%s'" % step)
    print("You can explore the state of the notebook at the beginning"
          " of this step by spawning a new notebook from the following"
          " Rok snapshot:")

    # FIXME: How do we retrieve the base URL of the ROK UI?
    version = task_info["task"]["result"]["event"]["version"]
    url_path = ("/rok/buckets/%s/files/%s/versions/%s" %
                (encode_url_component(bucket), encode_url_component(obj),
                 encode_url_component(version)))
    print("\n%s\n" % url_path)

    md_source = ("# Rok autosnapshot\n"
                 "Rok has successfully created a snapshot for step `%s`.\n\n"
                 "To **explore the execution state** at the beginning of "
                 "this step follow the instructions below:\n\n"
                 "1\\. View the [snapshot in the Rok UI](%s).\n\n"
                 "2\\. Copy the Rok URL.\n\n"
                 "3\\. Create a new Notebook Server by using this Rok URL to "
                 "autofill the form." % (step, url_path))
    metadata = {
        "outputs": [{
            "storage": "inline",
            "source": md_source,
            "type": "markdown"
        }]
    }
    with open("/mlpipeline-ui-metadata.json", "w") as f:
        json.dump(metadata, f)
Beispiel #3
0
def _get_client():
    global _client

    if _client is None:
        _client = RokClient()

    return _client
Beispiel #4
0
def create_rok_bucket(bucket, client=None):
    from rok_gw_client.client import RokClient, GatewayClientError
    if client is None:
        client = RokClient()

    # FIXME: Currently the Rok API only supports update-or-create for buckets,
    # so we do a HEAD first to avoid updating an existing bucket. This
    # obviously has a small race, which should be removed by extending the Rok
    # API with an exclusive creation API call.
    try:
        return False, client.bucket_info(bucket)
    except GatewayClientError as e:
        if e.response.status_code != 404:
            raise

        logger.info("Creating bucket: %s", bucket)
        return client.bucket_create(bucket)
Beispiel #5
0
def _get_client():
    from rok_gw_client.client import RokClient

    global _client

    if _client is None:
        _client = RokClient()

    return _client
Beispiel #6
0
def get_client():
    """Get (init if not exists) the Rok client."""
    from rok_gw_client.client import RokClient

    global _client

    if _client is None:
        _client = RokClient()

    return _client
Beispiel #7
0
def create_rok_bucket(bucket, client=None):
    """Create a new Rok bucket."""
    log.info("Creating Rok bucket '%s'...", bucket)
    from rok_gw_client.client import RokClient, GatewayClientError
    if client is None:
        client = RokClient()

    # FIXME: Currently the Rok API only supports update-or-create for buckets,
    # so we do a HEAD first to avoid updating an existing bucket. This
    # obviously has a small race, which should be removed by extending the Rok
    # API with an exclusive creation API call.
    try:
        bucket_info = client.bucket_info(bucket)
        log.info("Rok bucket '%s' already exists", bucket)
        return False, bucket_info
    except GatewayClientError as e:
        if e.response.status_code != 404:
            raise

        created, bucket_info = client.bucket_create(bucket)
        log.info("Successfully created Rok bucket '%s'", bucket)
        return created, bucket_info