Exemple #1
0
def create_katib_experiment(request, pipeline_id, pipeline_metadata,
                            output_path):
    """Create and launch a new Katib experiment.

    The Katib metadata must include all the information required to create an
    Experiment CRD (algorithm, objective, search parameters, ...). This
    information is sanitized a new yaml is written to file. This yaml is then
    submitted to the K8s API server to create the Experiment CR.

    Args:
        request: RPC request object
        pipeline_id: The id of the KFP pipeline that will be run by the Trials
        pipeline_metadata: The Kale notebook metadata
        output_path: The directory to store the YAML definition

    Returns (dict): a dictionary describing the status of the experiment
    """
    try:
        namespace = pod_utils.get_namespace()
    except Exception:
        # XXX: When not running from within a pod, get_namespace() fails
        # XXX: If that's the case, use the 'kubeflow-user' one
        # XXX: This should probably change. It works for local/MiniKF dev
        namespace = "kubeflow-user"

    katib_name = pipeline_metadata.get("experiment_name")
    katib_spec = pipeline_metadata.get("katib_metadata", None)
    if not katib_spec:
        raise RPCNotFoundError(details=("Could not find Katib specification in"
                                        " notebook's metadata"),
                               trans_id=request.trans_id)
    # Perform a sanitization of the Katib specification, making sure all the
    # required first-layer-fields are set
    katib_spec = _sanitize_katib_spec(request, katib_spec)

    trial_parameters = {
        "image": "gcr.io/arrikto-playground/elikatsis/kale/trials:1b82d32",
        "pipeline_id": pipeline_id,
        "experiment_name": pipeline_metadata.get("experiment_name")
    }

    katib_experiment = _define_katib_experiment(katib_name, katib_spec,
                                                trial_parameters)
    definition_path = os.path.abspath(
        os.path.join(output_path, "%s.katib.yaml" % katib_name))
    request.log.info("Saving Katib experiment definition at %s",
                     definition_path)
    with open(definition_path, "w") as yaml_file:
        import yaml
        yaml_text = yaml.dump(katib_experiment)
        yaml_file.write(yaml_text)
    _launch_katib_experiment(request, katib_experiment, namespace)

    return {
        "name": katib_experiment["metadata"]["name"],
        "namespace": namespace,
        "status": None,
        "trials": 0,
        "maxTrialCount": katib_experiment["spec"]["maxTrialCount"]
    }
Exemple #2
0
def check_rok_availability(request):
    """Check if Rok is available."""
    log = request.log if hasattr(request, "log") else logger
    try:
        rok = _get_client()
    except ImportError:
        log.exception("Failed to import RokClient")
        raise RPCNotFoundError(details="Rok Gateway Client module not found",
                               trans_id=request.trans_id)
    except Exception:
        log.exception("Failed to initialize RokClient")
        raise RPCServiceUnavailableError(details=("Failed to initialize"
                                                  " RokClient"),
                                         trans_id=request.trans_id)

    try:
        rok.account_info()
    except Exception:
        log.exception("Failed to retrieve account information")
        raise RPCServiceUnavailableError(details="Failed to access Rok",
                                         trans_id=request.trans_id)

    name = podutils.get_pod_name()
    namespace = podutils.get_namespace()
    try:
        suggestions = rok.version_register_suggest(DEFAULT_BUCKET,
                                                   name,
                                                   "jupyter",
                                                   "params:lab",
                                                   {"namespace": namespace},
                                                   ignore_env=True)
    except Exception as e:
        log.exception("Failed to list lab suggestions")
        message = "%s: %s" % (e.__class__.__name__, e)
        raise RPCServiceUnavailableError(message=message,
                                         details=("Rok cannot list notebooks"
                                                  " in this namespace"),
                                         trans_id=request.trans_id)

    if not any(s["value"] == name for s in suggestions):
        log.error("Could not find notebook '%s' in list of suggestions", name)
        raise RPCNotFoundError(details=("Could not find this notebook in"
                                        " notebooks listed by Rok"),
                               trans_id=request.trans_id)
Exemple #3
0
def _sanitize_parameters(request, parameters, parameter_names, defaults,
                         parameters_type):
    """Keep just the known parameter fields that are required."""
    sanitized = {}
    for param in parameter_names:
        if param not in parameters and param not in defaults:
            request.log.exception("%s parameter '%s' was not provided",
                                  parameters_type, param)
            raise RPCNotFoundError(details=("%s parameter '%s' is required" %
                                            (parameters_type, param)),
                                   trans_id=request.trans_id)
        sanitized[param] = parameters.pop(param, defaults.get(param))
    if parameters:
        request.log.debug("Ignoring %s parameters: %s", parameters_type,
                          ", ".join(parameters.keys()))
    return sanitized
Exemple #4
0
def create_katib_experiment(request, pipeline_id, version_id,
                            pipeline_metadata, output_path):
    """Create and launch a new Katib experiment.

    The Katib metadata must include all the information required to create an
    Experiment CRD (algorithm, objective, search parameters, ...). This
    information is sanitized a new yaml is written to file. This yaml is then
    submitted to the K8s API server to create the Experiment CR.

    Args:
        request: RPC request object
        pipeline_id: The id of the KFP pipeline that will be run by the Trials
        version_id: The id of the KFP pipeline version run by the Trials
        pipeline_metadata: The Kale notebook metadata
        output_path: The directory to store the YAML definition

    Returns (dict): a dictionary describing the status of the experiment
    """
    old_katibutils_logger = katibutils.log
    katibutils.log = request.log

    try:
        namespace = podutils.get_namespace()
    except Exception:
        # XXX: When not running from within a pod, get_namespace() fails
        # XXX: If that's the case, use the 'kubeflow-user' one
        # XXX: This should probably change. It works for local/MiniKF dev
        namespace = "kubeflow-user"

    katib_name = pipeline_metadata.get("experiment_name")
    katib_spec = pipeline_metadata.get("katib_metadata", None)
    if not katib_spec:
        raise RPCNotFoundError(details=("Could not find Katib specification in"
                                        " notebook's metadata"),
                               trans_id=request.trans_id)
    # Perform a sanitization of the Katib specification, making sure all the
    # required first-layer-fields are set
    katib_spec = _sanitize_katib_spec(request, katib_spec)

    katib_experiment = katibutils.construct_experiment_cr(
        name=katib_name,
        experiment_spec=katib_spec,
        pipeline_id=pipeline_id,
        version_id=version_id,
        experiment_name=pipeline_metadata.get("experiment_name"),
        api_version=katibutils.discover_katib_version())
    definition_path = os.path.abspath(
        os.path.join(output_path, "%s.katib.yaml" % katib_name))
    request.log.info("Saving Katib experiment definition at %s",
                     definition_path)
    with open(definition_path, "w") as yaml_file:
        import yaml
        yaml_text = yaml.dump(katib_experiment)
        yaml_file.write(yaml_text)

    try:
        _launch_katib_experiment(request, katib_experiment, namespace)
    except Exception:
        katibutils.log = old_katibutils_logger
        raise

    katibutils.log = old_katibutils_logger
    return _construct_experiment_return_base(katib_experiment, namespace)