コード例 #1
0
def create_and_wait_kfp_run(pipeline_id: str,
                            run_name: str,
                            version_id: str = None,
                            experiment_name: str = "Default",
                            namespace: str = "kubeflow",
                            **kwargs):
    """Create a KFP run, wait for it to complete and retrieve its metrics.

    Create a KFP run from a KFP pipeline with custom arguments and wait for
    it to finish. If it succeeds, return its metrics.

    Args:
        pipeline_id: KFP pipeline
        version_id: KFP pipeline's version (optional, not supported yet)
        experiment_name: KFP experiment to create run in. (default: "Default")
        namespace: Namespace of KFP deployment
        kwargs: All the parameters the pipeline will be fed with

    Returns:
        metrics: Dict of metrics along with their values
    """
    logger = _get_logger()

    pod_namespace = podutils.get_namespace()

    run_id = _create_kfp_run(pipeline_id, run_name, version_id,
                             experiment_name, namespace, **kwargs)

    logger.info("Annotating Trial '%s' with the KFP Run UUID '%s'...",
                run_name, run_id)
    try:
        # Katib Trial name == KFP Run name by design (see rpc.katib)
        katibutils.annotate_trial(run_name, pod_namespace,
                                  {KALE_KATIB_KFP_ANNOTATION: run_id})
    except Exception:
        logger.exception(
            "Failed to annotate Trial '%s' with the KFP Run UUID"
            " '%s'", run_name, run_id)

    logger.info("Getting Workflow name for run '%s'...", run_id)
    workflow_name = _get_workflow_from_run(get_run(run_id))["metadata"]["name"]
    logger.info("Workflow name: %s", workflow_name)
    logger.info("Getting the Katib trial...")
    trial = katibutils.get_trial(run_name, pod_namespace)
    logger.info("Trial name: %s, UID: %s", trial["metadata"]["name"],
                trial["metadata"]["uid"])
    logger.info("Getting owner Katib experiment of trial...")
    exp_name, exp_id = katibutils.get_owner_experiment_from_trial(trial)
    logger.info("Experiment name: %s, UID: %s", exp_name, exp_id)
    wf_annotations = {
        katibutils.EXPERIMENT_NAME_ANNOTATION_KEY: exp_name,
        katibutils.EXPERIMENT_ID_ANNOTATION_KEY: exp_id,
        katibutils.TRIAL_NAME_ANNOTATION_KEY: trial["metadata"]["name"],
        katibutils.TRIAL_ID_ANNOTATION_KEY: trial["metadata"]["uid"],
    }
    try:
        workflowutils.annotate_workflow(workflow_name, pod_namespace,
                                        wf_annotations)
    except Exception:
        logger.exception(
            "Failed to annotate Workflow '%s' with the Katib"
            " details", workflow_name)

    status = _wait_kfp_run(run_id)

    # If run has not succeeded, return no metrics
    if status != "Succeeded":
        logger.warning("KFP run did not run successfully. No metrics to"
                       " return.")
        # exit gracefully with error
        sys.exit(-1)

    # Retrieve metrics
    run_metrics = _get_kfp_run_metrics(run_id, namespace)
    for name, value in run_metrics.items():
        logger.info("%s=%s", name, value)

    return run_metrics
コード例 #2
0
def create_and_wait_kfp_run(pipeline_id: str,
                            version_id: str,
                            run_name: str,
                            experiment_name: str = "Default",
                            api_version: str = KATIB_API_VERSION_V1BETA1,
                            **kwargs):
    """Create a KFP run, wait for it to complete and retrieve its metrics.

    Create a KFP run from a KFP pipeline with custom arguments and wait for
    it to finish. If it succeeds, return its metrics, logging them in a format
    that can be parsed by Katib's metrics collector.

    Also, annotate the parent trial with the run UUID of the KFP run and
    annotation the KFP workflow with the Katib experiment and trial names and
    ids.

    Args:
        pipeline_id: KFP pipeline
        version_id: KFP pipeline's version
        run_name: The name of the new run
        experiment_name: KFP experiment to create run in. (default: "Default")
        api_version: The version of the Katib CRD (`v1alpha3` or `v1beta1`
        kwargs: All the parameters the pipeline will be fed with

    Returns:
        metrics: Dict of metrics along with their values
    """
    pod_namespace = podutils.get_namespace()
    run = kfputils.run_pipeline(experiment_name=experiment_name,
                                pipeline_id=pipeline_id,
                                version_id=version_id,
                                run_name=run_name,
                                **kwargs)
    run_id = run.id

    log.info("Annotating Trial '%s' with the KFP Run UUID '%s'...",
             run_name, run_id)
    try:
        # Katib Trial name == KFP Run name by design (see rpc.katib)
        annotate_trial(run_name, pod_namespace,
                       {KALE_KATIB_KFP_ANNOTATION_KEY: run_id}, api_version)
    except Exception:
        log.exception("Failed to annotate Trial '%s' with the KFP Run UUID"
                      " '%s'", run_name, run_id)

    log.info("Getting Workflow name for run '%s'...", run_id)
    workflow_name = kfputils.get_workflow_from_run(
        kfputils.get_run(run_id))["metadata"]["name"]
    log.info("Workflow name: %s", workflow_name)
    log.info("Getting the Katib trial...")
    trial = get_trial(run_name, pod_namespace, api_version)
    log.info("Trial name: %s, UID: %s", trial["metadata"]["name"],
             trial["metadata"]["uid"])
    log.info("Getting owner Katib experiment of trial...")
    exp_name, exp_id = get_owner_experiment_from_trial(trial)
    log.info("Experiment name: %s, UID: %s", exp_name, exp_id)
    wf_annotations = {
        EXPERIMENT_NAME_ANNOTATION_KEY: exp_name,
        EXPERIMENT_ID_ANNOTATION_KEY: exp_id,
        TRIAL_NAME_ANNOTATION_KEY: trial["metadata"]["name"],
        TRIAL_ID_ANNOTATION_KEY: trial["metadata"]["uid"],
    }
    try:
        workflowutils.annotate_workflow(workflow_name, pod_namespace,
                                        wf_annotations)
    except Exception:
        log.exception("Failed to annotate Workflow '%s' with the Katib"
                      " details", workflow_name)

    status = kfputils.wait_kfp_run(run_id)

    # If run has not succeeded, return no metrics
    if status != "Succeeded":
        log.warning("KFP run did not run successfully. No metrics to"
                    " return.")
        # exit gracefully with error
        sys.exit(-1)

    # Retrieve metrics
    run_metrics = kfputils.get_kfp_run_metrics(run_id)
    for name, value in run_metrics.items():
        log.info("%s=%s", name, value)

    return run_metrics