Example #1
0
 def __init__(self, trans_id=None, nb_path=None):
     if not trans_id:
         trans_id = random_string(size=10)
     self.log = create_adapter(logging.getLogger(__name__), trans_id,
                               nb_path)
     self.trans_id = trans_id
     self.nb_path = nb_path
Example #2
0
    def __init__(self):
        log.info("%s Initializing MLMD context... %s", "-" * 10, "-" * 10)
        log.info("Connecting to MLMD...")
        self.store = self._connect()
        log.info("Successfully connected to MLMD")
        log.info("Getting step details...")
        log.info("Getting pod name...")
        self.pod_name = podutils.get_pod_name()
        log.info("Successfully retrieved pod name: %s", self.pod_name)
        log.info("Getting pod namespace...")
        self.pod_namespace = podutils.get_namespace()
        log.info("Successfully retrieved pod namespace: %s",
                 self.pod_namespace)
        log.info("Getting pod...")
        self.pod = podutils.get_pod(self.pod_name, self.pod_namespace)
        log.info("Successfully retrieved pod")
        log.info("Getting workflow name from pod...")
        self.workflow_name = self.pod.metadata.labels.get(
            workflowutils.ARGO_WORKFLOW_LABEL_KEY)
        log.info("Successfully retrieved workflow name: %s",
                 self.workflow_name)
        log.info("Getting workflow...")
        self.workflow = workflowutils.get_workflow(self.workflow_name,
                                                   self.pod_namespace)
        log.info("Successfully retrieved workflow")

        workflow_labels = self.workflow["metadata"].get("labels", {})
        self.run_uuid = workflow_labels.get(podutils.KFP_RUN_ID_LABEL_KEY,
                                            self.workflow_name)
        log.info("Successfully retrieved KFP run ID: %s", self.run_uuid)

        workflow_annotations = self.workflow["metadata"].get("annotations", {})
        pipeline_spec = json.loads(
            workflow_annotations.get("pipelines.kubeflow.org/pipeline_spec",
                                     "{}"))
        self.pipeline_name = pipeline_spec.get("name", self.workflow_name)
        if self.pipeline_name:
            log.info("Successfully retrieved KFP pipeline_name: %s",
                     self.pipeline_name)
        else:
            log.info("Could not retrieve KFP pipeline name")

        self.component_id = podutils.compute_component_id(self.pod)
        self.execution_hash = self.pod.metadata.annotations.get(
            MLMD_EXECUTION_HASH_PROPERTY_KEY)
        if self.execution_hash:
            log.info("Successfully retrieved execution hash: %s",
                     self.execution_hash)
        else:
            self.execution_hash = utils.random_string(10)
            log.info(
                "Failed to retrieve execution hash."
                " Generating random string...: %s", self.execution_hash)

        self.run_context = self._get_or_create_run_context()
        self.execution = self._create_execution_in_run_context()
        self._label_with_context_and_execution()
        log.info("%s Successfully initialized MLMD context %s", "-" * 10,
                 "-" * 10)
Example #3
0
    def save_pipeline(self, pipeline_code, output_path=None):
        """Save Python code to file."""
        if output_path is None:
            # create tmp path
            tmp_dir = tempfile.mkdtemp()
            filename = "kale_pipeline_code_{}.py".format(
                utils.random_string(5))
            output_path = os.path.join(tmp_dir, filename)

        with open(output_path, "w") as f:
            f.write(pipeline_code)
        self.logger.info("Pipeline code saved at {}".format(output_path))
        return output_path
Example #4
0
def parse_metadata(notebook_metadata):
    """Parse the Notebook's metadata and update it when needed.

    Args:
        notebook_metadata (dict): metadata annotated by Kale.
        Refer to DEFAULT_METADATA for defaults

    Returns (dict): updated and validated metadata
    """
    # check for required fields before adding all possible defaults
    validated_notebook_metadata = copy.deepcopy(notebook_metadata)
    for required in METADATA_REQUIRED_KEYS:
        if required not in validated_notebook_metadata:
            raise ValueError(
                "Key {} not found. Add this field either on"
                " the notebook metadata or as an override".format(required))

    metadata = copy.deepcopy(DEFAULT_METADATA)
    metadata.update(validated_notebook_metadata)

    if not re.match(KALE_STEP_NAME_REGEX, metadata['pipeline_name']):
        raise ValueError("Pipeline name {}".format(KALE_NAME_MSG))

    # update the pipeline name with a random string
    random_pipeline_name = "{}-{}".format(metadata['pipeline_name'],
                                          random_string())
    metadata['pipeline_name'] = random_pipeline_name

    volumes = metadata.get('volumes', [])
    if isinstance(volumes, list):
        metadata.update({'volumes': _parse_volumes_metadata(volumes)})
    else:
        raise ValueError("Volumes spec must be a list")

    katib = metadata.get('katib', False)
    if not isinstance(katib, bool):
        raise ValueError("The field `katib` is not a boolean")
    if katib:
        _validate_katib_metadata(metadata.get("katib_metadata", {}))
        if not re.match(K8S_VALID_NAME_REGEX, metadata['experiment_name']):
            raise ValueError("When choosing HP Tuning, experiment name"
                             " {}".format(K8S_NAME_MSG))
    return metadata
Example #5
0
def run_pipeline(experiment_name: str,
                 pipeline_id: str,
                 run_name: str = None,
                 version_id: str = None,
                 host: str = None,
                 **kwargs) -> Any:
    """Run pipeline (without uploading) in kfp.

    Args:
        run_name: The name of the kfp run (autogenerated if not provided)
        experiment_name: The name of the kfp experiment
        pipeline_id: The ID of the uploaded pipeline to be run
        version_id: the ID of the pipeline to be run
        host: custom host when executing outside of the cluster

    Returns:
        Pipeline run metadata
    """
    client = _get_kfp_client(host)

    log.info("Creating KFP experiment '%s'...", experiment_name)
    experiment = client.create_experiment(experiment_name)
    pipeline = client.pipelines.get_pipeline(pipeline_id)
    pipeline_name = pipeline.name
    _version_id = version_id if version_id else pipeline.default_version.id
    version_name = client.pipelines.get_pipeline_version(_version_id).name
    if not run_name:
        run_name = ("%s-%s-%s" %
                    (pipeline_name, version_name, utils.random_string()))
    display_version = ("(%sversion: '%s')" %
                       ("" if version_id else "default ", version_name))
    log.info("Submitting new pipeline run '%s' for pipeline '%s' %s ...",
             run_name, pipeline_name, display_version)
    run = client.run_pipeline(experiment.id,
                              run_name,
                              pipeline_id=pipeline_id,
                              version_id=_version_id,
                              params=kwargs)
    run_url = ("%s/?ns=%s#/runs/details/%s" %
               (client._get_url_prefix(), podutils.get_namespace(), run.id))
    log.info("Successfully submitted pipeline run.")
    log.info("Run URL: <host>%s", run_url)
    return run
Example #6
0
def upload_pipeline(pipeline_package_path: str,
                    pipeline_name: str,
                    host: str = None) -> Tuple[str, str]:
    """Upload pipeline package to KFP.

    If a pipeline with the provided name already exits, upload a new version.

    Args:
        pipeline_package_path: Path to .tar.gz kfp pipeline
        pipeline_name: Name of the uploaded pipeline
        host: custom host when executing outside of the cluster
    Returns: (pipeline_id, version_id)
    """
    client = _get_kfp_client(host)
    log.info("Uploading pipeline '%s'...", pipeline_name)
    pipeline_id = get_pipeline_id(pipeline_name, host=host)
    version_name = utils.random_string()
    if not pipeline_id:
        # The first version of the pipeline is set to the pipeline name value.
        # To work around this, upload the first pipeline, then another one
        # with a proper version name. Finally delete the original pipeline.
        upp = client.pipeline_uploads.upload_pipeline(
            uploadfile=pipeline_package_path, name=pipeline_name)
        pipeline_id = upp.id
        upv = client.pipeline_uploads.upload_pipeline_version(
            uploadfile=pipeline_package_path,
            name=version_name,
            pipelineid=pipeline_id)
        # delete the first version which has the same name as the pipeline
        client.pipelines.delete_pipeline_version(upp.default_version.id)
    else:
        upv = client.pipeline_uploads.upload_pipeline_version(
            uploadfile=pipeline_package_path,
            name=version_name,
            pipelineid=pipeline_id)
    log.info("Successfully uploaded version '%s' for pipeline '%s'.",
             version_name, pipeline_name)
    return pipeline_id, upv.id
Example #7
0
def generate_run_name(pipeline_name: str):
    """Generate a new run name based on pipeline name."""
    return "{}_run-{}".format(pipeline_name, utils.random_string(5))
Example #8
0
 def _randomize_pipeline_name(self):
     self.pipeline_name = "%s-%s" % (self.pipeline_name,
                                     utils.random_string())
Example #9
0
def serve(model: Any,
          name: str = None,
          wait: bool = True,
          predictor: str = None,
          preprocessing_fn: Callable = None,
          preprocessing_assets: Dict = None) -> KFServer:
    """Main API used to serve models from a notebook or a pipeline step.

    This function procedurally deploys a KFServing InferenceService, starting
    from a model object. A summary list of actions follows:

    * Autogenerate an InferenceService name, if not provided
    * Process transformer function (and related assets)
    * Dump the model, to a path under a mounted PVC
    * Snapshot the PVC
    * Hydrate a new PVC from the new snapshot
    * Submit an InferenceService CR
    * Monitor the CR until it becomes ready

    FIXME: Improve documentation. Provide some examples in the docstring and
      explain how the preprocessing function parsing works.

    Args:
        model: Model object to be used as a predictor
        name (optional): Name of the predictor. Will be autogenerated if not
            provided
        wait (optional): Wait for the InferenceService to become ready.
            Default: True
        predictor (optional): Predictor type to be used for the
            InferenceService. If not provided it will be inferred using
            the the matching marshalling backends.
        preprocessing_fn (optional): A processing function that will be
            deployed as a KFServing Transformer
        preprocessing_assets (optional): A dictionary with object required by
            the preprocessing function. This is needed in case the
            preprocessing function references global objects.

    Returns: A KFServer instance
    """
    log.info("Starting serve procedure for model '%s'", model)
    if not name:
        name = "%s-%s" % (podutils.get_pod_name(), utils.random_string(5))

    # Validate and process transformer
    if preprocessing_fn:
        _prepare_transformer_assets(preprocessing_fn, preprocessing_assets)

    # Detect predictor type
    predictor_type = marshal.get_backend(model).predictor_type
    if predictor and predictor != predictor_type:
        raise RuntimeError("Trying to create an InferenceService with"
                           " predictor of type '%s' but the model is of type"
                           " '%s'" % (predictor, predictor_type))
    if not predictor_type:
        log.error(
            "Kale does not yet support serving objects with '%s'"
            " backend.\n\nPlease help us improve Kale by opening a new"
            " issue at:\n"
            "https://github.com/kubeflow-kale/kale/issues",
            marshal.get_backend(model).display_name)
        utils.graceful_exit(-1)
    predictor = predictor_type  # in case `predictor` is None

    volume = podutils.get_volume_containing_path(PVC_ROOT)
    volume_name = volume[1].persistent_volume_claim.claim_name
    log.info("Model is contained in volume '%s'", volume_name)

    # Dump the model
    marshal.set_data_dir(PREDICTOR_MODEL_DIR)
    model_filepath = marshal.save(model, "model")
    log.info("Model saved successfully at '%s'", model_filepath)

    # Take snapshot
    task_info = rokutils.snapshot_pvc(volume_name,
                                      bucket=rokutils.SERVING_BUCKET,
                                      wait=True)
    task = rokutils.get_task(task_info["task"]["id"],
                             bucket=rokutils.SERVING_BUCKET)
    new_pvc_name = "%s-pvc-%s" % (name, utils.random_string(5))
    rokutils.hydrate_pvc_from_snapshot(task["result"]["event"]["object"],
                                       task["result"]["event"]["version"],
                                       new_pvc_name,
                                       bucket=rokutils.SERVING_BUCKET)

    # Cleanup: remove dumped model and transformer assets from the current PVC
    utils.rm_r(
        os.path.join(PREDICTOR_MODEL_DIR, os.path.basename(model_filepath)))
    utils.rm_r(TRANSFORMER_ASSETS_DIR, silent=True)

    # Need an absolute path from the *root* of the PVC. Add '/' if not exists.
    pvc_model_path = "/" + PREDICTOR_MODEL_DIR.lstrip(PVC_ROOT)
    # Tensorflow saves the model's files into a directory by itself
    if predictor == "tensorflow":
        pvc_model_path += "/" + os.path.basename(model_filepath).lstrip("/")

    kfserver = create_inference_service(name=name,
                                        predictor=predictor,
                                        pvc_name=new_pvc_name,
                                        model_path=pvc_model_path,
                                        transformer=preprocessing_fn
                                        is not None)

    if wait:
        monitor_inference_service(kfserver.name)
    return kfserver