Esempio n. 1
0
def _launch_katib_experiment(request, katib_experiment, namespace):
    """Launch Katib experiment."""
    try:
        katibutils.create_experiment(katib_experiment, namespace)
    except ApiException as e:
        request.log.exception("Failed to launch Katib experiment")
        raise RPCUnhandledError(message="Failed to launch Katib experiment",
                                details=str(e),
                                trans_id=request.trans_id)
Esempio n. 2
0
def _get_k8s_co_client(trans_id):
    try:
        kubernetes.config.load_incluster_config()
    except Exception:  # Not in a notebook server
        try:
            kubernetes.config.load_kube_config()
        except Exception:
            raise RPCUnhandledError(details="Could not load Kubernetes config",
                                    trans_id=trans_id)

    return kubernetes.client.CustomObjectsApi()
Esempio n. 3
0
def get_experiment(request, experiment, namespace):
    """Get a Katib Experiment.

    This RPC is used by the labextension when polling for the state of a
    running Experiment.

    Args:
        request: RPC request object
        experiment: Name of the Katib experiment
        namespace: Namespace of the experiment

    Returns (dict): a dict describing the status of the running experiment
    """
    k8s_co_client = k8sutils.get_co_client()

    co_group = "kubeflow.org"
    co_version = "v1alpha3"
    co_plural = "experiments"

    try:
        exp = k8s_co_client.get_namespaced_custom_object(
            co_group, co_version, namespace, co_plural, experiment)
    except ApiException as e:
        request.log.exception("Failed to get Katib experiment")
        raise RPCUnhandledError(message="Failed to get Katib experiment",
                                details=str(e),
                                trans_id=request.trans_id)

    ret = _construct_experiment_return_base(exp, namespace)
    if exp.get("status") is None:
        return ret

    status, reason, message = _get_experiment_status(exp["status"])
    ret.update({
        "status": status,
        "reason": reason,
        "message": message,
        "trials": exp["status"].get("trials", 0),
        "trialsFailed": exp["status"].get("trialsFailed", 0),
        "trialsRunning": exp["status"].get("trialsRunning", 0),
        "trialsSucceeded": exp["status"].get("trialsSucceeded", 0),
        "currentOptimalTrial": exp["status"].get("currentOptimalTrial")
    })
    return ret
Esempio n. 4
0
def _launch_katib_experiment(request, katib_experiment, namespace):
    """Launch Katib experiment."""
    k8s_co_client = _get_k8s_co_client(request.trans_id)

    co_group = "kubeflow.org"
    co_version = "v1alpha3"
    co_plural = "experiments"

    request.log.debug("Launching Katib Experiment '%s'...",
                      katib_experiment["metadata"]["name"])
    try:
        k8s_co_client.create_namespaced_custom_object(co_group, co_version,
                                                      namespace, co_plural,
                                                      katib_experiment)
    except ApiException as e:
        request.log.exception("Failed to launch Katib experiment")
        raise RPCUnhandledError(message="Failed to launch Katib experiment",
                                details=str(e), trans_id=request.trans_id)
    request.log.info("Successfully launched Katib Experiment")
Esempio n. 5
0
def get_experiment(request, experiment, namespace):
    """Get a Katib Experiment.

    This RPC is used by the labextension when polling for the state of a
    running Experiment.

    Args:
        request: RPC request object
        experiment: Name of the Katib experiment
        namespace: Namespace of the experiment

    Returns (dict): a dict describing the status of the running experiment
    """
    k8s_co_client = _get_k8s_co_client(request.trans_id)

    co_group = "kubeflow.org"
    co_version = "v1alpha3"
    co_plural = "experiments"

    try:
        exp = k8s_co_client.get_namespaced_custom_object(
            co_group, co_version, namespace, co_plural, experiment)
    except ApiException as e:
        request.log.exception("Failed to get Katib experiment")
        raise RPCUnhandledError(message="Failed to get Katib experiment",
                                details=str(e),
                                trans_id=request.trans_id)

    return {
        "name": experiment,
        "namespace": namespace,
        "status": _get_experiment_status(exp["status"]),
        "trials": exp["status"].get("trials", 0),
        "trialsFailed": exp["status"].get("trialsFailed", 0),
        "trialsRunning": exp["status"].get("trialsRunning", 0),
        "trialsSucceeded": exp["status"].get("trialsSucceeded", 0),
        "maxTrialCount": exp["spec"]["maxTrialCount"],
        "currentOptimalTrial": exp["status"].get("currentOptimalTrial")
    }