Beispiel #1
0
class Job(DeployerInterface):  #pylint:disable=too-many-instance-attributes
    """Handle all the k8s' template building for a training"""
    def __init__(self,
                 namespace=None,
                 runs=1,
                 output=None,
                 cleanup=True,
                 labels=None,
                 job_name=None,
                 stream_log=True,
                 deployer_type=constants.JOB_DEPLOPYER_TYPE,
                 pod_spec_mutators=None,
                 annotations=None,
                 config_file=None,
                 context=None,
                 client_configuration=None,
                 persist_config=True,
                 verify_ssl=True):
        """

        :param namespace: k8s namespace where the training's components will be deployed.
        :param runs: Number of training(s) to be deployed. Hyperparameter search
                will generate multiple jobs.
        :param output: output
        :param cleanup: clean up deletes components after job finished
        :param labels: labels to be assigned to the training job
        :param job_name: name of the job
        :param stream_log: stream the log?
        :param deployer_type: type of deployer
        :param pod_spec_mutators: pod spec mutators (Default value = None)
        :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case
               that the SDK is running in cluster and you want to operate in another remote
               cluster, user must set config_file to load kube-config file explicitly.
        :param context: kubernetes context
        :param client_configuration: The kubernetes.client.Configuration to set configs to.
        :param persist_config: If True, config file will be updated when changed
        :param verify_ssl: use ssl verify or not, set in the client config
        """
        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        # Used as pod and job name
        self.job_name = job_name
        self.deployer_type = deployer_type
        self.deployment_spec = None
        self.runs = runs
        self.output = output
        self.backend = KubeManager(config_file=config_file,
                                   context=context,
                                   client_configuration=client_configuration,
                                   persist_config=persist_config,
                                   verify_ssl=verify_ssl)
        self.cleanup = cleanup
        self.stream_log = stream_log
        self.set_labels(labels, deployer_type)
        self.set_anotations(annotations)
        self.pod_spec_mutators = pod_spec_mutators or []
        self.verify_ssl = verify_ssl

    def set_anotations(self, annotations):
        self.annotations = {}
        if annotations:
            self.annotations.update(annotations)

    def set_labels(self, labels, deployer_type):
        """set labels for the pods of a deployed job

        :param labels: dictionary of labels {label_name:label_value}
        :param deployer_type: deployer type name

        """
        self.labels = {'fairing-deployer': deployer_type}
        if labels:
            self.labels.update(labels)

    def deploy(self, pod_spec):  #pylint:disable=arguments-differ
        """deploy the training job using k8s client lib

        :param pod_spec: pod spec of deployed training job

        """
        self.job_id = str(uuid.uuid1())
        self.labels['fairing-id'] = self.job_id
        for fn in self.pod_spec_mutators:
            fn(self.backend, pod_spec, self.namespace)
        pod_template_spec = self.generate_pod_template_spec(pod_spec)
        pod_template_spec.spec.restart_policy = 'Never'
        pod_template_spec.spec.containers[0].name = 'fairing-job'
        self.deployment_spec = self.generate_deployment_spec(pod_template_spec)
        if self.output:
            api = k8s_client.ApiClient()
            job_output = api.sanitize_for_serialization(self.deployment_spec)
            print(json.dumps(job_output))

        name = self.create_resource()
        logger.warning("The {} {} launched.".format(self.deployer_type, name))

        if self.stream_log:
            self.get_logs()

        return name

    def create_resource(self):
        """ create job"""
        self._created_job = self.backend.create_job(self.namespace,
                                                    self.deployment_spec)
        return self._created_job.metadata.name

    def generate_pod_template_spec(self, pod_spec):
        """Generate a V1PodTemplateSpec initiazlied with correct metadata
            and with the provided pod_spec

        :param pod_spec: pod spec

        """
        if not isinstance(pod_spec, k8s_client.V1PodSpec):
            raise TypeError('pod_spec must be a V1PodSpec, but got %s' %
                            type(pod_spec))
        if not self.annotations:
            self.annotations = {'sidecar.istio.io/inject': 'false'}
        else:
            self.annotations['sidecar.istio.io/inject'] = 'false'
        return k8s_client.V1PodTemplateSpec(metadata=k8s_client.V1ObjectMeta(
            name="fairing-deployer",
            annotations=self.annotations,
            labels=self.labels),
                                            spec=pod_spec)

    def generate_deployment_spec(self, pod_template_spec):
        """Generate a V1Job initialized with correct completion and
         parallelism (for HP search) and with the provided V1PodTemplateSpec

        :param pod_template_spec: V1PodTemplateSpec

        """
        if not isinstance(pod_template_spec, k8s_client.V1PodTemplateSpec):
            raise TypeError("""pod_template_spec must be a V1PodTemplateSpec,
                but got %s""" % type(pod_template_spec))

        job_spec = k8s_client.V1JobSpec(
            template=pod_template_spec,
            parallelism=self.runs,
            completions=self.runs,
            backoff_limit=0,
        )

        return k8s_client.V1Job(api_version="batch/v1",
                                kind="Job",
                                metadata=k8s_client.V1ObjectMeta(
                                    name=self.job_name,
                                    generate_name=constants.JOB_DEFAULT_NAME,
                                    labels=self.labels),
                                spec=job_spec)

    def get_logs(self):
        """ get logs from the deployed job"""
        self.backend.log(self._created_job.metadata.name,
                         self._created_job.metadata.namespace,
                         self.labels,
                         container="fairing-job")

        if self.cleanup:
            self.do_cleanup()

    def do_cleanup(self):
        """ clean up the pods after job finished"""
        logger.warning("Cleaning up job {}...".format(
            self._created_job.metadata.name))
        client_config = k8s_client.Configuration()
        client_config.verify_ssl = self.verify_ssl
        api_client = k8s_client.ApiClient(configuration=client_config)
        k8s_client.BatchV1Api(api_client=api_client).delete_namespaced_job(
            self._created_job.metadata.name,
            self._created_job.metadata.namespace,
            body=k8s_client.V1DeleteOptions(propagation_policy='Foreground'))
Beispiel #2
0
class KFServing(DeployerInterface):
    """Serves a prediction endpoint using Kubeflow KFServing."""
    def __init__(self,
                 framework,
                 default_storage_uri=None,
                 canary_storage_uri=None,
                 canary_traffic_percent=0,
                 namespace=None,
                 labels=None,
                 annotations=None,
                 custom_default_container=None,
                 custom_canary_container=None,
                 isvc_name=None,
                 stream_log=False,
                 cleanup=False,
                 config_file=None,
                 context=None,
                 client_configuration=None,
                 persist_config=True):
        """
        :param framework: The framework for the InferenceService, such as Tensorflow,
            XGBoost and ScikitLearn etc.
        :param default_storage_uri: URI pointing to Saved Model assets for default service.
        :param canary_storage_uri: URI pointing to Saved Model assets for canary service.
        :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
        :param namespace: The k8s namespace where the InferenceService will be deployed.
        :param labels: Labels for the InferenceService, separate with commas if have more than one.
        :param annotations: Annotations for the InferenceService,
            separate with commas if have more than one.
        :param custom_default_container: A flexible custom default container for arbitrary customer
                                 provided containers.
        :param custom_canary_container: A flexible custom canary container for arbitrary customer
                                 provided containers.
        :param isvc_name: The InferenceService name.
        :param stream_log: Show log or not when InferenceService started, defaults to True.
        :param cleanup: Delete the kfserving or not, defaults to False.
        :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case
               that the SDK is running in cluster and you want to operate in another remote
               cluster, user must set config_file to load kube-config file explicitly.
        :param context: kubernetes context
        :param client_configuration: The kubernetes.client.Configuration to set configs to.
        :param persist_config: If True, config file will be updated when changed
        """
        self.framework = framework
        self.isvc_name = isvc_name
        self.default_storage_uri = default_storage_uri
        self.canary_storage_uri = canary_storage_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_container = custom_default_container
        self.custom_canary_container = custom_canary_container
        self.stream_log = stream_log
        self.backend = KubeManager(config_file=config_file,
                                   context=context,
                                   client_configuration=client_configuration,
                                   persist_config=persist_config)

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        if self.framework != 'custom' and self.default_storage_uri is None:
            raise RuntimeError("The default_storage_uri must be specified for "
                               "{} framework.".format(self.framework))
        if self.framework == 'custom' and self.custom_default_container is None:
            raise RuntimeError(
                "The custom_default_container must be specified "
                "for custom framework.")

    def set_labels(self, labels):
        """set label for deployed prediction

        :param labels: dictionary of labels {label_name:label_value}

        """
        self.fairing_id = str(uuid.uuid1())
        self.labels = {'fairing-id': self.fairing_id}
        if labels:
            self.labels.update(labels)

    def deploy(self, isvc):  # pylint:disable=arguments-differ,unused-argument
        """deploy kfserving endpoint

        :param isvc: InferenceService for deploying.

        """
        self.created_isvc = self.backend.create_isvc(self.namespace,
                                                     self.generate_isvc())

        if self.stream_log:
            self.get_logs()

        isvc_name = self.created_isvc['metadata']['name']
        logger.info(
            "Deployed the InferenceService {} successfully.".format(isvc_name))

        if self.cleanup:
            logger.warning(
                "Cleaning up InferenceService {}...".format(isvc_name))
            self.backend.delete_isvc(isvc_name, self.namespace)

        return isvc_name

    def generate_isvc(self):
        """ generate InferenceService """

        api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
        default_predictor, canary_predictor = None, None

        if self.framework == 'custom':
            default_predictor = self.generate_predictor_spec(
                self.framework, container=self.custom_default_container)
        else:
            default_predictor = self.generate_predictor_spec(
                self.framework, storage_uri=self.default_storage_uri)

        if self.framework != 'custom' and self.canary_storage_uri is not None:
            canary_predictor = self.generate_predictor_spec(
                self.framework, storage_uri=self.canary_storage_uri)
        if self.framework == 'custom' and self.custom_canary_container is not None:
            canary_predictor = self.generate_predictor_spec(
                self.framework, container=self.custom_canary_container)

        if canary_predictor:
            isvc_spec = V1alpha2InferenceServiceSpec(
                default=V1alpha2EndpointSpec(predictor=default_predictor),
                canary=V1alpha2EndpointSpec(predictor=canary_predictor),
                canary_traffic_percent=self.canary_traffic_percent)
        else:
            isvc_spec = V1alpha2InferenceServiceSpec(
                default=V1alpha2EndpointSpec(predictor=default_predictor),
                canary_traffic_percent=self.canary_traffic_percent)

        return V1alpha2InferenceService(
            api_version=api_version,
            kind=constants.KFSERVING_KIND,
            metadata=k8s_client.V1ObjectMeta(
                name=self.isvc_name,
                generate_name=constants.KFSERVING_DEFAULT_NAME,
                namespace=self.namespace),
            spec=isvc_spec)

    def generate_predictor_spec(self,
                                framework,
                                storage_uri=None,
                                container=None):
        '''Generate predictor spec according to framework and
           default_storage_uri or custom container.
        '''
        if self.framework == 'tensorflow':
            predictor = V1alpha2PredictorSpec(
                tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri))
        elif self.framework == 'onnx':
            predictor = V1alpha2PredictorSpec(onnx=V1alpha2ONNXSpec(
                storage_uri=storage_uri))
        elif self.framework == 'pytorch':
            predictor = V1alpha2PredictorSpec(pytorch=V1alpha2PyTorchSpec(
                storage_uri=storage_uri))
        elif self.framework == 'sklearn':
            predictor = V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec(
                storage_uri=storage_uri))
        elif self.framework == 'triton':
            predictor = V1alpha2PredictorSpec(triton=V1alpha2TritonSpec(
                storage_uri=storage_uri))
        elif self.framework == 'xgboost':
            predictor = V1alpha2PredictorSpec(xgboost=V1alpha2XGBoostSpec(
                storage_uri=storage_uri))
        elif self.framework == 'custom':
            predictor = V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
                container=container))
        else:
            raise RuntimeError("Unsupported framework {}".format(framework))
        return predictor

    def get_logs(self):
        """ get log from prediction pod"""
        name = self.created_isvc['metadata']['name']
        namespace = self.created_isvc['metadata']['namespace']

        self.backend.log(name,
                         namespace,
                         self.labels,
                         container=constants.KFSERVING_CONTAINER_NAME,
                         follow=False)
Beispiel #3
0
class ClusterBuilder(BaseBuilder):
    """Builds a docker image in a Kubernetes cluster.
    """
    def __init__(self,
                 registry=None,
                 image_name=constants.DEFAULT_IMAGE_NAME,
                 context_source=None,
                 preprocessor=None,
                 push=True,
                 base_image=constants.DEFAULT_BASE_IMAGE,
                 pod_spec_mutators=None,
                 namespace=None,
                 dockerfile_path=None,
                 cleanup=False,
                 executable_path_prefix=None):
        super().__init__(registry=registry,
                         image_name=image_name,
                         push=push,
                         preprocessor=preprocessor,
                         base_image=base_image,
                         dockerfile_path=dockerfile_path)
        self.manager = KubeManager()
        if context_source is None:
            raise RuntimeError("context_source is not specified")
        self.context_source = context_source
        self.pod_spec_mutators = pod_spec_mutators or []
        self.namespace = namespace or utils.get_default_target_namespace()
        self.cleanup = cleanup
        self.executable_path_prefix = executable_path_prefix

    def build(self):
        logging.info("Building image using cluster builder.")
        install_reqs_before_copy = self.preprocessor.is_requirements_txt_file_present(
        )
        if self.dockerfile_path:
            dockerfile_path = self.dockerfile_path
        else:
            dockerfile_path = dockerfile.write_dockerfile(
                path_prefix=self.preprocessor.path_prefix,
                base_image=self.base_image,
                install_reqs_before_copy=install_reqs_before_copy,
                executable_path_prefix=self.executable_path_prefix)
        self.preprocessor.output_map[dockerfile_path] = 'Dockerfile'
        context_path, context_hash = self.preprocessor.context_tar_gz()
        self.image_tag = self.full_image_name(context_hash)
        self.context_source.prepare(context_path)
        labels = {'fairing-builder': 'kaniko'}
        labels['fairing-build-id'] = str(uuid.uuid1())
        pod_spec = self.context_source.generate_pod_spec(
            self.image_tag, self.push)
        for fn in self.pod_spec_mutators:
            fn(self.manager, pod_spec, self.namespace)

        pod_spec_template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(
                generate_name="fairing-builder-",
                labels=labels,
                namespace=self.namespace,
                annotations={"sidecar.istio.io/inject": "false"},
            ),
            spec=pod_spec)
        job_spec = client.V1JobSpec(
            template=pod_spec_template,
            parallelism=1,
            completions=1,
            backoff_limit=0,
        )
        build_job = client.V1Job(api_version="batch/v1",
                                 kind="Job",
                                 metadata=client.V1ObjectMeta(
                                     generate_name="fairing-builder-",
                                     labels=labels,
                                 ),
                                 spec=job_spec)
        created_job = client. \
            BatchV1Api(). \
            create_namespaced_job(self.namespace, build_job)

        self.manager.log(name=created_job.metadata.name,
                         namespace=created_job.metadata.namespace,
                         selectors=labels,
                         container="kaniko")

        # Invoke upstream clean ups
        self.context_source.cleanup()
        # Cleanup build_job if requested by user
        # Otherwise build_job will be cleaned up by Kubernetes GC
        if self.cleanup:
            logging.warning("Cleaning up job {}...".format(
                created_job.metadata.name))
            client. \
                BatchV1Api(). \
                delete_namespaced_job(
                    created_job.metadata.name,
                    created_job.metadata.namespace,
                    body=client.V1DeleteOptions(propagation_policy='Foreground')
                )
Beispiel #4
0
class KFServing(DeployerInterface):
    """Serves a prediction endpoint using Kubeflow KFServing."""

    def __init__(self, framework, default_model_uri=None, canary_model_uri=None,
                 canary_traffic_percent=0, namespace=None, labels=None, annotations=None,
                 custom_default_spec=None, custom_canary_spec=None, stream_log=True,
                 cleanup=False):
        """

        :param framework: The framework for the kfservice, such as Tensorflow,
            XGBoost and ScikitLearn etc.
        :param default_model_uri: URI pointing to Saved Model assets for default service.
        :param canary_model_uri: URI pointing to Saved Model assets for canary service.
        :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
        :param namespace: The k8s namespace where the kfservice will be deployed.
        :param labels: Labels for the kfservice, separate with commas if have more than one.
        :param annotations: Annotations for the kfservice,
            separate with commas if have more than one.
        :param custom_default_spec: A flexible custom default specification for arbitrary customer
                                 provided containers.
        :param custom_canary_spec: A flexible custom canary specification for arbitrary customer
                                 provided containers.
        :param stream_log: Show log or not when kfservice started, defaults to True.
        :param cleanup: Delete the kfserving or not, defaults to False.
        """
        self.framework = framework
        self.default_model_uri = default_model_uri
        self.canary_model_uri = canary_model_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_spec = custom_default_spec
        self.custom_canary_spec = custom_canary_spec
        self.stream_log = stream_log
        self.backend = KubeManager()

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

    def set_labels(self, labels):
        """set label for deployed prediction

        :param labels: dictionary of labels {label_name:label_value}

        """
        self.fairing_id = str(uuid.uuid1())
        self.labels = {'fairing-id': self.fairing_id}
        if labels:
            self.labels.update(labels)

    def deploy(self, template_spec): # pylint:disable=arguments-differ,unused-argument
        """deploy kfserving endpoint

        :param template_spec: template spec

        """
        self.kfservice = self.generate_kfservice()
        self.created_kfserving = self.backend.create_kfserving(
            self.namespace, self.kfservice)
        if self.stream_log:
            self.get_logs()

        kfservice_name = self.created_kfserving['metadata']['name']
        logger.warning(
            "Deployed the kfservice {} successfully.".format(kfservice_name))

        if self.cleanup:
            logger.warning("Cleaning up kfservice {}...".format(kfservice_name))
            self.backend.delete_kfserving(kfservice_name, self.namespace)

        return kfservice_name

    def generate_kfservice(self):
        """ generate kfserving template"""

        spec = {}
        spec['default'] = {}
        if self.framework is not 'custom': # pylint:disable=literal-comparison
            if self.default_model_uri is not None:
                spec['default'][self.framework] = {}
                spec['default'][self.framework]['modelUri'] = self.default_model_uri
            else:
                raise RuntimeError(
                    "The default_model_uri must be defined if the framework is not custom.")
        else:
            if self.custom_default_spec is not None:
                # TBD @jinchi Need to validate the custom_default_spec before executing.
                spec['default'][self.framework] = self.custom_default_spec
            else:
                raise RuntimeError(
                    "The custom_default_spec must be defined if the framework is custom.")

        if self.framework != 'custom':
            if self.canary_model_uri is not None:
                spec['canary'] = {}
                spec['canary'][self.framework] = {}
                spec['canary'][self.framework]['modelUri'] = self.canary_model_uri
                spec['canaryTrafficPercent'] = self.canary_traffic_percent
        else:
            if self.custom_default_spec is not None:
                spec['canary'] = {}
                spec['canary'][self.framework] = self.custom_canary_spec
                spec['canaryTrafficPercent'] = self.canary_traffic_percent

        metadata = k8s_client.V1ObjectMeta(
            generate_name=constants.KFSERVING_DEFAULT_NAME,
            namespace=self.namespace,
            labels=self.labels,
            annotations=self.annotations
        )

        kfservice = {}
        kfservice['kind'] = constants.KFSERVING_KIND
        kfservice['apiVersion'] = constants.KFSERVING_GROUP + \
            '/' + constants.KFSERVING_VERSION
        kfservice['metadata'] = metadata
        kfservice['spec'] = spec

        return kfservice

    def get_logs(self):
        """ get log from prediction pod"""
        name = self.created_kfserving['metadata']['name']
        namespace = self.created_kfserving['metadata']['namespace']

        self.backend.log(name, namespace, self.labels,
                         container=constants.KFSERVING_CONTAINER_NAME, follow=False)