def create_katib_experiment(request, pipeline_id, pipeline_metadata, output_path): """Create and launch a new Katib experiment. The Katib metadata must include all the information required to create an Experiment CRD (algorithm, objective, search parameters, ...). This information is sanitized a new yaml is written to file. This yaml is then submitted to the K8s API server to create the Experiment CR. Args: request: RPC request object pipeline_id: The id of the KFP pipeline that will be run by the Trials pipeline_metadata: The Kale notebook metadata output_path: The directory to store the YAML definition Returns (dict): a dictionary describing the status of the experiment """ try: namespace = pod_utils.get_namespace() except Exception: # XXX: When not running from within a pod, get_namespace() fails # XXX: If that's the case, use the 'kubeflow-user' one # XXX: This should probably change. It works for local/MiniKF dev namespace = "kubeflow-user" katib_name = pipeline_metadata.get("experiment_name") katib_spec = pipeline_metadata.get("katib_metadata", None) if not katib_spec: raise RPCNotFoundError(details=("Could not find Katib specification in" " notebook's metadata"), trans_id=request.trans_id) # Perform a sanitization of the Katib specification, making sure all the # required first-layer-fields are set katib_spec = _sanitize_katib_spec(request, katib_spec) trial_parameters = { "image": "gcr.io/arrikto-playground/elikatsis/kale/trials:1b82d32", "pipeline_id": pipeline_id, "experiment_name": pipeline_metadata.get("experiment_name") } katib_experiment = _define_katib_experiment(katib_name, katib_spec, trial_parameters) definition_path = os.path.abspath( os.path.join(output_path, "%s.katib.yaml" % katib_name)) request.log.info("Saving Katib experiment definition at %s", definition_path) with open(definition_path, "w") as yaml_file: import yaml yaml_text = yaml.dump(katib_experiment) yaml_file.write(yaml_text) _launch_katib_experiment(request, katib_experiment, namespace) return { "name": katib_experiment["metadata"]["name"], "namespace": namespace, "status": None, "trials": 0, "maxTrialCount": katib_experiment["spec"]["maxTrialCount"] }
def check_rok_availability(request): """Check if Rok is available.""" log = request.log if hasattr(request, "log") else logger try: rok = _get_client() except ImportError: log.exception("Failed to import RokClient") raise RPCNotFoundError(details="Rok Gateway Client module not found", trans_id=request.trans_id) except Exception: log.exception("Failed to initialize RokClient") raise RPCServiceUnavailableError(details=("Failed to initialize" " RokClient"), trans_id=request.trans_id) try: rok.account_info() except Exception: log.exception("Failed to retrieve account information") raise RPCServiceUnavailableError(details="Failed to access Rok", trans_id=request.trans_id) name = podutils.get_pod_name() namespace = podutils.get_namespace() try: suggestions = rok.version_register_suggest(DEFAULT_BUCKET, name, "jupyter", "params:lab", {"namespace": namespace}, ignore_env=True) except Exception as e: log.exception("Failed to list lab suggestions") message = "%s: %s" % (e.__class__.__name__, e) raise RPCServiceUnavailableError(message=message, details=("Rok cannot list notebooks" " in this namespace"), trans_id=request.trans_id) if not any(s["value"] == name for s in suggestions): log.error("Could not find notebook '%s' in list of suggestions", name) raise RPCNotFoundError(details=("Could not find this notebook in" " notebooks listed by Rok"), trans_id=request.trans_id)
def _sanitize_parameters(request, parameters, parameter_names, defaults, parameters_type): """Keep just the known parameter fields that are required.""" sanitized = {} for param in parameter_names: if param not in parameters and param not in defaults: request.log.exception("%s parameter '%s' was not provided", parameters_type, param) raise RPCNotFoundError(details=("%s parameter '%s' is required" % (parameters_type, param)), trans_id=request.trans_id) sanitized[param] = parameters.pop(param, defaults.get(param)) if parameters: request.log.debug("Ignoring %s parameters: %s", parameters_type, ", ".join(parameters.keys())) return sanitized
def create_katib_experiment(request, pipeline_id, version_id, pipeline_metadata, output_path): """Create and launch a new Katib experiment. The Katib metadata must include all the information required to create an Experiment CRD (algorithm, objective, search parameters, ...). This information is sanitized a new yaml is written to file. This yaml is then submitted to the K8s API server to create the Experiment CR. Args: request: RPC request object pipeline_id: The id of the KFP pipeline that will be run by the Trials version_id: The id of the KFP pipeline version run by the Trials pipeline_metadata: The Kale notebook metadata output_path: The directory to store the YAML definition Returns (dict): a dictionary describing the status of the experiment """ old_katibutils_logger = katibutils.log katibutils.log = request.log try: namespace = podutils.get_namespace() except Exception: # XXX: When not running from within a pod, get_namespace() fails # XXX: If that's the case, use the 'kubeflow-user' one # XXX: This should probably change. It works for local/MiniKF dev namespace = "kubeflow-user" katib_name = pipeline_metadata.get("experiment_name") katib_spec = pipeline_metadata.get("katib_metadata", None) if not katib_spec: raise RPCNotFoundError(details=("Could not find Katib specification in" " notebook's metadata"), trans_id=request.trans_id) # Perform a sanitization of the Katib specification, making sure all the # required first-layer-fields are set katib_spec = _sanitize_katib_spec(request, katib_spec) katib_experiment = katibutils.construct_experiment_cr( name=katib_name, experiment_spec=katib_spec, pipeline_id=pipeline_id, version_id=version_id, experiment_name=pipeline_metadata.get("experiment_name"), api_version=katibutils.discover_katib_version()) definition_path = os.path.abspath( os.path.join(output_path, "%s.katib.yaml" % katib_name)) request.log.info("Saving Katib experiment definition at %s", definition_path) with open(definition_path, "w") as yaml_file: import yaml yaml_text = yaml.dump(katib_experiment) yaml_file.write(yaml_text) try: _launch_katib_experiment(request, katib_experiment, namespace) except Exception: katibutils.log = old_katibutils_logger raise katibutils.log = old_katibutils_logger return _construct_experiment_return_base(katib_experiment, namespace)