Example #1
0
    def __init__(self, namespace=None, runs=1, output=None,
                 cleanup=True, labels=None, job_name=constants.JOB_DEFAULT_NAME,
                 stream_log=True, deployer_type=constants.JOB_DEPLOPYER_TYPE,
                 pod_spec_mutators=None, annotations=None):
        """

        :param namespace: k8s namespace where the training's components will be deployed.
        :param runs: Number of training(s) to be deployed. Hyperparameter search
                will generate multiple jobs.
        :param output: output
        :param cleanup: clean up deletes components after job finished
        :param labels: labels to be assigned to the training job
        :param job_name: name of the job
        :param stream_log: stream the log?
        :param deployer_type: type of deployer
        :param pod_spec_mutators: pod spec mutators (Default value = None)
        """
        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        # Used as pod and job name
        self.job_name = job_name
        self.deployer_type = deployer_type
        self.deployment_spec = None
        self.runs = runs
        self.output = output
        self.backend = KubeManager()
        self.cleanup = cleanup
        self.stream_log = stream_log
        self.set_labels(labels, deployer_type)
        self.set_anotations(annotations)
        self.pod_spec_mutators = pod_spec_mutators or []
Example #2
0
 def __init__(self,
              registry=None,
              image_name=constants.DEFAULT_IMAGE_NAME,
              context_source=None,
              preprocessor=None,
              push=True,
              base_image=constants.DEFAULT_BASE_IMAGE,
              pod_spec_mutators=None,
              namespace=None,
              dockerfile_path=None,
              cleanup=False,
              executable_path_prefix=None):
     super().__init__(registry=registry,
                      image_name=image_name,
                      push=push,
                      preprocessor=preprocessor,
                      base_image=base_image,
                      dockerfile_path=dockerfile_path)
     self.manager = KubeManager()
     if context_source is None:
         raise RuntimeError("context_source is not specified")
     self.context_source = context_source
     self.pod_spec_mutators = pod_spec_mutators or []
     self.namespace = namespace or utils.get_default_target_namespace()
     self.cleanup = cleanup
     self.executable_path_prefix = executable_path_prefix
Example #3
0
    def __init__(self,
                 namespace=None,
                 runs=1,
                 output=None,
                 cleanup=True,
                 labels=None,
                 job_name=None,
                 stream_log=True,
                 deployer_type=constants.JOB_DEPLOPYER_TYPE,
                 pod_spec_mutators=None,
                 annotations=None,
                 config_file=None,
                 context=None,
                 client_configuration=None,
                 persist_config=True,
                 verify_ssl=True):
        """

        :param namespace: k8s namespace where the training's components will be deployed.
        :param runs: Number of training(s) to be deployed. Hyperparameter search
                will generate multiple jobs.
        :param output: output
        :param cleanup: clean up deletes components after job finished
        :param labels: labels to be assigned to the training job
        :param job_name: name of the job
        :param stream_log: stream the log?
        :param deployer_type: type of deployer
        :param pod_spec_mutators: pod spec mutators (Default value = None)
        :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case
               that the SDK is running in cluster and you want to operate in another remote
               cluster, user must set config_file to load kube-config file explicitly.
        :param context: kubernetes context
        :param client_configuration: The kubernetes.client.Configuration to set configs to.
        :param persist_config: If True, config file will be updated when changed
        :param verify_ssl: use ssl verify or not, set in the client config
        """
        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        # Used as pod and job name
        self.job_name = job_name
        self.deployer_type = deployer_type
        self.deployment_spec = None
        self.runs = runs
        self.output = output
        self.backend = KubeManager(config_file=config_file,
                                   context=context,
                                   client_configuration=client_configuration,
                                   persist_config=persist_config,
                                   verify_ssl=verify_ssl)
        self.cleanup = cleanup
        self.stream_log = stream_log
        self.set_labels(labels, deployer_type)
        self.set_anotations(annotations)
        self.pod_spec_mutators = pod_spec_mutators or []
        self.verify_ssl = verify_ssl
Example #4
0
    def __init__(self,
                 framework,
                 default_storage_uri=None,
                 canary_storage_uri=None,
                 canary_traffic_percent=0,
                 namespace=None,
                 labels=None,
                 annotations=None,
                 custom_default_container=None,
                 custom_canary_container=None,
                 isvc_name=None,
                 stream_log=False,
                 cleanup=False):
        """
        :param framework: The framework for the InferenceService, such as Tensorflow,
            XGBoost and ScikitLearn etc.
        :param default_storage_uri: URI pointing to Saved Model assets for default service.
        :param canary_storage_uri: URI pointing to Saved Model assets for canary service.
        :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
        :param namespace: The k8s namespace where the InferenceService will be deployed.
        :param labels: Labels for the InferenceService, separate with commas if have more than one.
        :param annotations: Annotations for the InferenceService,
            separate with commas if have more than one.
        :param custom_default_container: A flexible custom default container for arbitrary customer
                                 provided containers.
        :param custom_canary_container: A flexible custom canary container for arbitrary customer
                                 provided containers.
        :param isvc_name: The InferenceService name.
        :param stream_log: Show log or not when InferenceService started, defaults to True.
        :param cleanup: Delete the kfserving or not, defaults to False.
        """
        self.framework = framework
        self.isvc_name = isvc_name
        self.default_storage_uri = default_storage_uri
        self.canary_storage_uri = canary_storage_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_container = custom_default_container
        self.custom_canary_container = custom_canary_container
        self.stream_log = stream_log
        self.backend = KubeManager()

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        if self.framework != 'custom' and self.default_storage_uri is None:
            raise RuntimeError("The default_storage_uri must be specified for "
                               "{} framework.".format(self.framework))
        if self.framework == 'custom' and self.custom_default_container is None:
            raise RuntimeError(
                "The custom_default_container must be specified "
                "for custom framework.")
Example #5
0
 def __init__(self, endpoint_url, minio_secret, minio_secret_key,
              region_name):
     self.endpoint_url = endpoint_url
     self.minio_secret = minio_secret
     self.minio_secret_key = minio_secret_key
     self.region_name = region_name
     self.Manager = KubeManager()
Example #6
0
def get_ibm_cos_credentials(namespace):
    """
    Get the IBM COS credential from secret.

    :param namespace(str): The namespace that IBM COS credential secret created in.
    """
    secret_name = constants.IBM_COS_CREDS_SECRET_NAME
    if not KubeManager().secret_exists(secret_name, namespace):
        raise Exception("Secret '{}' not found in namespace '{}'".format(
            secret_name, namespace))

    secret = client.CoreV1Api().read_namespaced_secret(secret_name, namespace)
    creds_data = secret.data[constants.IBM_COS_CREDS_FILE_NAME]
    creds_json = base64.b64decode(creds_data).decode('utf-8')

    cos_creds = json.loads(creds_json)
    if cos_creds.get('cos_hmac_keys', ''):
        aws_access_key_id = cos_creds['cos_hmac_keys'].get('access_key_id', '')
        aws_secret_accesss_key = cos_creds['cos_hmac_keys'].get(
            'secret_access_key', '')
    else:
        raise RuntimeError(
            "Kaniko needs AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY\
                           if using S3 Bucket. Please use HMAC Credential.")

    return aws_access_key_id, aws_secret_accesss_key
Example #7
0
 def __init__(self,
              gcp_project=None,
              credentials_file=os.environ.get(constants.GOOGLE_CREDS_ENV),
              namespace='default'):
     self.gcp_project = gcp_project
     self.credentials_file = credentials_file
     self.manager = KubeManager()
     self.namespace = namespace
Example #8
0
 def __init__(self,
              aws_account=None,
              region=None,
              bucket_name=None):
     self.aws_account = aws_account
     self.manager = KubeManager()
     self.region = region or 'us-east-1'
     self.bucket_name = bucket_name
Example #9
0
    def __init__(self, framework, default_model_uri=None, canary_model_uri=None,
                 canary_traffic_percent=0, namespace=None, labels=None, annotations=None,
                 custom_default_spec=None, custom_canary_spec=None, stream_log=True,
                 cleanup=False):
        """

        :param framework: The framework for the kfservice, such as Tensorflow,
            XGBoost and ScikitLearn etc.
        :param default_model_uri: URI pointing to Saved Model assets for default service.
        :param canary_model_uri: URI pointing to Saved Model assets for canary service.
        :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
        :param namespace: The k8s namespace where the kfservice will be deployed.
        :param labels: Labels for the kfservice, separate with commas if have more than one.
        :param annotations: Annotations for the kfservice,
            separate with commas if have more than one.
        :param custom_default_spec: A flexible custom default specification for arbitrary customer
                                 provided containers.
        :param custom_canary_spec: A flexible custom canary specification for arbitrary customer
                                 provided containers.
        :param stream_log: Show log or not when kfservice started, defaults to True.
        :param cleanup: Delete the kfserving or not, defaults to False.
        """
        self.framework = framework
        self.default_model_uri = default_model_uri
        self.canary_model_uri = canary_model_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_spec = custom_default_spec
        self.custom_canary_spec = custom_canary_spec
        self.stream_log = stream_log
        self.backend = KubeManager()

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace
Example #10
0
def get_azure_credentials(namespace):
    secret_name = constants.AZURE_CREDS_SECRET_NAME
    if not KubeManager().secret_exists(secret_name, namespace):
        raise Exception("Secret '{}' not found in namespace '{}'".format(
            secret_name, namespace))

    v1 = client.CoreV1Api()
    secret = v1.read_namespaced_secret(secret_name, namespace)
    sp_credentials = ServicePrincipalCredentials(
        client_id=get_plain_secret_value(secret.data, 'AZ_CLIENT_ID'),
        secret=get_plain_secret_value(secret.data, 'AZ_CLIENT_SECRET'),
        tenant=get_plain_secret_value(secret.data, 'AZ_TENANT_ID'))
    subscription_id = get_plain_secret_value(secret.data, 'AZ_SUBSCRIPTION_ID')
    return sp_credentials, subscription_id
Example #11
0
    def get_builder(self, preprocessor, base_image, registry, needs_deps_installation=True,
                    pod_spec_mutators=None):
        """Creates a builder instance with right config for GKE

        :param preprocessor: Preprocessor to use to modify inputs
        :param base_image: Base image to use for this job
        :param registry: Registry to push image to. Example: gcr.io/kubeflow-images
        :param needs_deps_installation:  need depends on installation(Default value = True)
        :param pod_spec_mutators: list of functions that is used to mutate the podsspec.
                                  e.g. fairing.cloud.gcp.add_gcp_credentials_if_exists
                                  This can used to set things like volumes and security context.
                                  (Default value =None)

        """

        pod_spec_mutators = pod_spec_mutators or []
        pod_spec_mutators.append(gcp.add_gcp_credentials_if_exists)

        if not needs_deps_installation:
            return AppendBuilder(preprocessor=preprocessor,
                                 base_image=base_image,
                                 registry=registry)
        elif (utils.is_running_in_k8s() or
              not ml_tasks_utils.is_docker_daemon_exists()):
            return ClusterBuilder(preprocessor=preprocessor,
                                  base_image=base_image,
                                  registry=registry,
                                  pod_spec_mutators=pod_spec_mutators,
                                  namespace=self._namespace,
                                  context_source=self._build_context_source)
        elif ml_tasks_utils.is_docker_daemon_exists():
            return DockerBuilder(preprocessor=preprocessor,
                                 base_image=base_image,
                                 registry=registry)
        else:
            msg = ["Not able to guess the right builder for this job!"]
            if KubeManager().secret_exists(constants.GCP_CREDS_SECRET_NAME, self._namespace):
                msg.append("It seems you don't have permission to list/access secrets in your "
                           "Kubeflow cluster. We need this permission in order to build a docker "
                           "image using Kubeflow cluster. Adding Kubeneters Admin role to the "
                           "service account you are using might solve this issue.")
            if not utils.is_running_in_k8s():
                msg.append(" Also If you are using 'sudo' to access docker in your system you can"
                           " solve this problem by adding your username to the docker group. "
                           "Reference: https://docs.docker.com/install/linux/linux-postinstall/"
                           "#manage-docker-as-a-non-root-user You need to logout and login to "
                           "get change activated.")
            message = " ".join(msg)
            raise RuntimeError(message)
Example #12
0
class ClusterBuilder(BaseBuilder):
    """Builds a docker image in a Kubernetes cluster.
    """
    def __init__(self,
                 registry=None,
                 image_name=constants.DEFAULT_IMAGE_NAME,
                 context_source=None,
                 preprocessor=None,
                 push=True,
                 base_image=constants.DEFAULT_BASE_IMAGE,
                 pod_spec_mutators=None,
                 namespace=None,
                 dockerfile_path=None,
                 cleanup=False,
                 executable_path_prefix=None):
        super().__init__(registry=registry,
                         image_name=image_name,
                         push=push,
                         preprocessor=preprocessor,
                         base_image=base_image,
                         dockerfile_path=dockerfile_path)
        self.manager = KubeManager()
        if context_source is None:
            raise RuntimeError("context_source is not specified")
        self.context_source = context_source
        self.pod_spec_mutators = pod_spec_mutators or []
        self.namespace = namespace or utils.get_default_target_namespace()
        self.cleanup = cleanup
        self.executable_path_prefix = executable_path_prefix

    def build(self):
        logging.info("Building image using cluster builder.")
        install_reqs_before_copy = self.preprocessor.is_requirements_txt_file_present(
        )
        if self.dockerfile_path:
            dockerfile_path = self.dockerfile_path
        else:
            dockerfile_path = dockerfile.write_dockerfile(
                path_prefix=self.preprocessor.path_prefix,
                base_image=self.base_image,
                install_reqs_before_copy=install_reqs_before_copy,
                executable_path_prefix=self.executable_path_prefix)
        self.preprocessor.output_map[dockerfile_path] = 'Dockerfile'
        context_path, context_hash = self.preprocessor.context_tar_gz()
        self.image_tag = self.full_image_name(context_hash)
        self.context_source.prepare(context_path)
        labels = {'fairing-builder': 'kaniko'}
        labels['fairing-build-id'] = str(uuid.uuid1())
        pod_spec = self.context_source.generate_pod_spec(
            self.image_tag, self.push)
        for fn in self.pod_spec_mutators:
            fn(self.manager, pod_spec, self.namespace)

        pod_spec_template = client.V1PodTemplateSpec(
            metadata=client.V1ObjectMeta(
                generate_name="fairing-builder-",
                labels=labels,
                namespace=self.namespace,
                annotations={"sidecar.istio.io/inject": "false"},
            ),
            spec=pod_spec)
        job_spec = client.V1JobSpec(
            template=pod_spec_template,
            parallelism=1,
            completions=1,
            backoff_limit=0,
        )
        build_job = client.V1Job(api_version="batch/v1",
                                 kind="Job",
                                 metadata=client.V1ObjectMeta(
                                     generate_name="fairing-builder-",
                                     labels=labels,
                                 ),
                                 spec=job_spec)
        created_job = client. \
            BatchV1Api(). \
            create_namespaced_job(self.namespace, build_job)

        self.manager.log(name=created_job.metadata.name,
                         namespace=created_job.metadata.namespace,
                         selectors=labels,
                         container="kaniko")

        # Invoke upstream clean ups
        self.context_source.cleanup()
        # Cleanup build_job if requested by user
        # Otherwise build_job will be cleaned up by Kubernetes GC
        if self.cleanup:
            logging.warning("Cleaning up job {}...".format(
                created_job.metadata.name))
            client. \
                BatchV1Api(). \
                delete_namespaced_job(
                    created_job.metadata.name,
                    created_job.metadata.namespace,
                    body=client.V1DeleteOptions(propagation_policy='Foreground')
                )
Example #13
0
class KFServing(DeployerInterface):
    """Serves a prediction endpoint using Kubeflow KFServing."""

    def __init__(self, framework, default_model_uri=None, canary_model_uri=None,
                 canary_traffic_percent=0, namespace=None, labels=None, annotations=None,
                 custom_default_spec=None, custom_canary_spec=None, stream_log=True,
                 cleanup=False):
        """

        :param framework: The framework for the kfservice, such as Tensorflow,
            XGBoost and ScikitLearn etc.
        :param default_model_uri: URI pointing to Saved Model assets for default service.
        :param canary_model_uri: URI pointing to Saved Model assets for canary service.
        :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
        :param namespace: The k8s namespace where the kfservice will be deployed.
        :param labels: Labels for the kfservice, separate with commas if have more than one.
        :param annotations: Annotations for the kfservice,
            separate with commas if have more than one.
        :param custom_default_spec: A flexible custom default specification for arbitrary customer
                                 provided containers.
        :param custom_canary_spec: A flexible custom canary specification for arbitrary customer
                                 provided containers.
        :param stream_log: Show log or not when kfservice started, defaults to True.
        :param cleanup: Delete the kfserving or not, defaults to False.
        """
        self.framework = framework
        self.default_model_uri = default_model_uri
        self.canary_model_uri = canary_model_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_spec = custom_default_spec
        self.custom_canary_spec = custom_canary_spec
        self.stream_log = stream_log
        self.backend = KubeManager()

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

    def set_labels(self, labels):
        """set label for deployed prediction

        :param labels: dictionary of labels {label_name:label_value}

        """
        self.fairing_id = str(uuid.uuid1())
        self.labels = {'fairing-id': self.fairing_id}
        if labels:
            self.labels.update(labels)

    def deploy(self, template_spec): # pylint:disable=arguments-differ,unused-argument
        """deploy kfserving endpoint

        :param template_spec: template spec

        """
        self.kfservice = self.generate_kfservice()
        self.created_kfserving = self.backend.create_kfserving(
            self.namespace, self.kfservice)
        if self.stream_log:
            self.get_logs()

        kfservice_name = self.created_kfserving['metadata']['name']
        logger.warning(
            "Deployed the kfservice {} successfully.".format(kfservice_name))

        if self.cleanup:
            logger.warning("Cleaning up kfservice {}...".format(kfservice_name))
            self.backend.delete_kfserving(kfservice_name, self.namespace)

        return kfservice_name

    def generate_kfservice(self):
        """ generate kfserving template"""

        spec = {}
        spec['default'] = {}
        if self.framework is not 'custom': # pylint:disable=literal-comparison
            if self.default_model_uri is not None:
                spec['default'][self.framework] = {}
                spec['default'][self.framework]['modelUri'] = self.default_model_uri
            else:
                raise RuntimeError(
                    "The default_model_uri must be defined if the framework is not custom.")
        else:
            if self.custom_default_spec is not None:
                # TBD @jinchi Need to validate the custom_default_spec before executing.
                spec['default'][self.framework] = self.custom_default_spec
            else:
                raise RuntimeError(
                    "The custom_default_spec must be defined if the framework is custom.")

        if self.framework != 'custom':
            if self.canary_model_uri is not None:
                spec['canary'] = {}
                spec['canary'][self.framework] = {}
                spec['canary'][self.framework]['modelUri'] = self.canary_model_uri
                spec['canaryTrafficPercent'] = self.canary_traffic_percent
        else:
            if self.custom_default_spec is not None:
                spec['canary'] = {}
                spec['canary'][self.framework] = self.custom_canary_spec
                spec['canaryTrafficPercent'] = self.canary_traffic_percent

        metadata = k8s_client.V1ObjectMeta(
            generate_name=constants.KFSERVING_DEFAULT_NAME,
            namespace=self.namespace,
            labels=self.labels,
            annotations=self.annotations
        )

        kfservice = {}
        kfservice['kind'] = constants.KFSERVING_KIND
        kfservice['apiVersion'] = constants.KFSERVING_GROUP + \
            '/' + constants.KFSERVING_VERSION
        kfservice['metadata'] = metadata
        kfservice['spec'] = spec

        return kfservice

    def get_logs(self):
        """ get log from prediction pod"""
        name = self.created_kfserving['metadata']['name']
        namespace = self.created_kfserving['metadata']['namespace']

        self.backend.log(name, namespace, self.labels,
                         container=constants.KFSERVING_CONTAINER_NAME, follow=False)
Example #14
0
class KFServing(DeployerInterface):
    """Serves a prediction endpoint using Kubeflow KFServing."""
    def __init__(self,
                 framework,
                 default_storage_uri=None,
                 canary_storage_uri=None,
                 canary_traffic_percent=0,
                 namespace=None,
                 labels=None,
                 annotations=None,
                 custom_default_container=None,
                 custom_canary_container=None,
                 isvc_name=None,
                 stream_log=False,
                 cleanup=False,
                 config_file=None,
                 context=None,
                 client_configuration=None,
                 persist_config=True):
        """
        :param framework: The framework for the InferenceService, such as Tensorflow,
            XGBoost and ScikitLearn etc.
        :param default_storage_uri: URI pointing to Saved Model assets for default service.
        :param canary_storage_uri: URI pointing to Saved Model assets for canary service.
        :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
        :param namespace: The k8s namespace where the InferenceService will be deployed.
        :param labels: Labels for the InferenceService, separate with commas if have more than one.
        :param annotations: Annotations for the InferenceService,
            separate with commas if have more than one.
        :param custom_default_container: A flexible custom default container for arbitrary customer
                                 provided containers.
        :param custom_canary_container: A flexible custom canary container for arbitrary customer
                                 provided containers.
        :param isvc_name: The InferenceService name.
        :param stream_log: Show log or not when InferenceService started, defaults to True.
        :param cleanup: Delete the kfserving or not, defaults to False.
        :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case
               that the SDK is running in cluster and you want to operate in another remote
               cluster, user must set config_file to load kube-config file explicitly.
        :param context: kubernetes context
        :param client_configuration: The kubernetes.client.Configuration to set configs to.
        :param persist_config: If True, config file will be updated when changed
        """
        self.framework = framework
        self.isvc_name = isvc_name
        self.default_storage_uri = default_storage_uri
        self.canary_storage_uri = canary_storage_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_container = custom_default_container
        self.custom_canary_container = custom_canary_container
        self.stream_log = stream_log
        self.backend = KubeManager(config_file=config_file,
                                   context=context,
                                   client_configuration=client_configuration,
                                   persist_config=persist_config)

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        if self.framework != 'custom' and self.default_storage_uri is None:
            raise RuntimeError("The default_storage_uri must be specified for "
                               "{} framework.".format(self.framework))
        if self.framework == 'custom' and self.custom_default_container is None:
            raise RuntimeError(
                "The custom_default_container must be specified "
                "for custom framework.")

    def set_labels(self, labels):
        """set label for deployed prediction

        :param labels: dictionary of labels {label_name:label_value}

        """
        self.fairing_id = str(uuid.uuid1())
        self.labels = {'fairing-id': self.fairing_id}
        if labels:
            self.labels.update(labels)

    def deploy(self, isvc):  # pylint:disable=arguments-differ,unused-argument
        """deploy kfserving endpoint

        :param isvc: InferenceService for deploying.

        """
        self.created_isvc = self.backend.create_isvc(self.namespace,
                                                     self.generate_isvc())

        if self.stream_log:
            self.get_logs()

        isvc_name = self.created_isvc['metadata']['name']
        logger.info(
            "Deployed the InferenceService {} successfully.".format(isvc_name))

        if self.cleanup:
            logger.warning(
                "Cleaning up InferenceService {}...".format(isvc_name))
            self.backend.delete_isvc(isvc_name, self.namespace)

        return isvc_name

    def generate_isvc(self):
        """ generate InferenceService """

        api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
        default_predictor, canary_predictor = None, None

        if self.framework == 'custom':
            default_predictor = self.generate_predictor_spec(
                self.framework, container=self.custom_default_container)
        else:
            default_predictor = self.generate_predictor_spec(
                self.framework, storage_uri=self.default_storage_uri)

        if self.framework != 'custom' and self.canary_storage_uri is not None:
            canary_predictor = self.generate_predictor_spec(
                self.framework, storage_uri=self.canary_storage_uri)
        if self.framework == 'custom' and self.custom_canary_container is not None:
            canary_predictor = self.generate_predictor_spec(
                self.framework, container=self.custom_canary_container)

        if canary_predictor:
            isvc_spec = V1alpha2InferenceServiceSpec(
                default=V1alpha2EndpointSpec(predictor=default_predictor),
                canary=V1alpha2EndpointSpec(predictor=canary_predictor),
                canary_traffic_percent=self.canary_traffic_percent)
        else:
            isvc_spec = V1alpha2InferenceServiceSpec(
                default=V1alpha2EndpointSpec(predictor=default_predictor),
                canary_traffic_percent=self.canary_traffic_percent)

        return V1alpha2InferenceService(
            api_version=api_version,
            kind=constants.KFSERVING_KIND,
            metadata=k8s_client.V1ObjectMeta(
                name=self.isvc_name,
                generate_name=constants.KFSERVING_DEFAULT_NAME,
                namespace=self.namespace),
            spec=isvc_spec)

    def generate_predictor_spec(self,
                                framework,
                                storage_uri=None,
                                container=None):
        '''Generate predictor spec according to framework and
           default_storage_uri or custom container.
        '''
        if self.framework == 'tensorflow':
            predictor = V1alpha2PredictorSpec(
                tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri))
        elif self.framework == 'onnx':
            predictor = V1alpha2PredictorSpec(onnx=V1alpha2ONNXSpec(
                storage_uri=storage_uri))
        elif self.framework == 'pytorch':
            predictor = V1alpha2PredictorSpec(pytorch=V1alpha2PyTorchSpec(
                storage_uri=storage_uri))
        elif self.framework == 'sklearn':
            predictor = V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec(
                storage_uri=storage_uri))
        elif self.framework == 'triton':
            predictor = V1alpha2PredictorSpec(triton=V1alpha2TritonSpec(
                storage_uri=storage_uri))
        elif self.framework == 'xgboost':
            predictor = V1alpha2PredictorSpec(xgboost=V1alpha2XGBoostSpec(
                storage_uri=storage_uri))
        elif self.framework == 'custom':
            predictor = V1alpha2PredictorSpec(custom=V1alpha2CustomSpec(
                container=container))
        else:
            raise RuntimeError("Unsupported framework {}".format(framework))
        return predictor

    def get_logs(self):
        """ get log from prediction pod"""
        name = self.created_isvc['metadata']['name']
        namespace = self.created_isvc['metadata']['namespace']

        self.backend.log(name,
                         namespace,
                         self.labels,
                         container=constants.KFSERVING_CONTAINER_NAME,
                         follow=False)
Example #15
0
class Job(DeployerInterface):  #pylint:disable=too-many-instance-attributes
    """Handle all the k8s' template building for a training"""
    def __init__(self,
                 namespace=None,
                 runs=1,
                 output=None,
                 cleanup=True,
                 labels=None,
                 job_name=None,
                 stream_log=True,
                 deployer_type=constants.JOB_DEPLOPYER_TYPE,
                 pod_spec_mutators=None,
                 annotations=None,
                 config_file=None,
                 context=None,
                 client_configuration=None,
                 persist_config=True,
                 verify_ssl=True):
        """

        :param namespace: k8s namespace where the training's components will be deployed.
        :param runs: Number of training(s) to be deployed. Hyperparameter search
                will generate multiple jobs.
        :param output: output
        :param cleanup: clean up deletes components after job finished
        :param labels: labels to be assigned to the training job
        :param job_name: name of the job
        :param stream_log: stream the log?
        :param deployer_type: type of deployer
        :param pod_spec_mutators: pod spec mutators (Default value = None)
        :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case
               that the SDK is running in cluster and you want to operate in another remote
               cluster, user must set config_file to load kube-config file explicitly.
        :param context: kubernetes context
        :param client_configuration: The kubernetes.client.Configuration to set configs to.
        :param persist_config: If True, config file will be updated when changed
        :param verify_ssl: use ssl verify or not, set in the client config
        """
        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        # Used as pod and job name
        self.job_name = job_name
        self.deployer_type = deployer_type
        self.deployment_spec = None
        self.runs = runs
        self.output = output
        self.backend = KubeManager(config_file=config_file,
                                   context=context,
                                   client_configuration=client_configuration,
                                   persist_config=persist_config,
                                   verify_ssl=verify_ssl)
        self.cleanup = cleanup
        self.stream_log = stream_log
        self.set_labels(labels, deployer_type)
        self.set_anotations(annotations)
        self.pod_spec_mutators = pod_spec_mutators or []
        self.verify_ssl = verify_ssl

    def set_anotations(self, annotations):
        self.annotations = {}
        if annotations:
            self.annotations.update(annotations)

    def set_labels(self, labels, deployer_type):
        """set labels for the pods of a deployed job

        :param labels: dictionary of labels {label_name:label_value}
        :param deployer_type: deployer type name

        """
        self.labels = {'fairing-deployer': deployer_type}
        if labels:
            self.labels.update(labels)

    def deploy(self, pod_spec):  #pylint:disable=arguments-differ
        """deploy the training job using k8s client lib

        :param pod_spec: pod spec of deployed training job

        """
        self.job_id = str(uuid.uuid1())
        self.labels['fairing-id'] = self.job_id
        for fn in self.pod_spec_mutators:
            fn(self.backend, pod_spec, self.namespace)
        pod_template_spec = self.generate_pod_template_spec(pod_spec)
        pod_template_spec.spec.restart_policy = 'Never'
        pod_template_spec.spec.containers[0].name = 'fairing-job'
        self.deployment_spec = self.generate_deployment_spec(pod_template_spec)
        if self.output:
            api = k8s_client.ApiClient()
            job_output = api.sanitize_for_serialization(self.deployment_spec)
            print(json.dumps(job_output))

        name = self.create_resource()
        logger.warning("The {} {} launched.".format(self.deployer_type, name))

        if self.stream_log:
            self.get_logs()

        return name

    def create_resource(self):
        """ create job"""
        self._created_job = self.backend.create_job(self.namespace,
                                                    self.deployment_spec)
        return self._created_job.metadata.name

    def generate_pod_template_spec(self, pod_spec):
        """Generate a V1PodTemplateSpec initiazlied with correct metadata
            and with the provided pod_spec

        :param pod_spec: pod spec

        """
        if not isinstance(pod_spec, k8s_client.V1PodSpec):
            raise TypeError('pod_spec must be a V1PodSpec, but got %s' %
                            type(pod_spec))
        if not self.annotations:
            self.annotations = {'sidecar.istio.io/inject': 'false'}
        else:
            self.annotations['sidecar.istio.io/inject'] = 'false'
        return k8s_client.V1PodTemplateSpec(metadata=k8s_client.V1ObjectMeta(
            name="fairing-deployer",
            annotations=self.annotations,
            labels=self.labels),
                                            spec=pod_spec)

    def generate_deployment_spec(self, pod_template_spec):
        """Generate a V1Job initialized with correct completion and
         parallelism (for HP search) and with the provided V1PodTemplateSpec

        :param pod_template_spec: V1PodTemplateSpec

        """
        if not isinstance(pod_template_spec, k8s_client.V1PodTemplateSpec):
            raise TypeError("""pod_template_spec must be a V1PodTemplateSpec,
                but got %s""" % type(pod_template_spec))

        job_spec = k8s_client.V1JobSpec(
            template=pod_template_spec,
            parallelism=self.runs,
            completions=self.runs,
            backoff_limit=0,
        )

        return k8s_client.V1Job(api_version="batch/v1",
                                kind="Job",
                                metadata=k8s_client.V1ObjectMeta(
                                    name=self.job_name,
                                    generate_name=constants.JOB_DEFAULT_NAME,
                                    labels=self.labels),
                                spec=job_spec)

    def get_logs(self):
        """ get logs from the deployed job"""
        self.backend.log(self._created_job.metadata.name,
                         self._created_job.metadata.namespace,
                         self.labels,
                         container="fairing-job")

        if self.cleanup:
            self.do_cleanup()

    def do_cleanup(self):
        """ clean up the pods after job finished"""
        logger.warning("Cleaning up job {}...".format(
            self._created_job.metadata.name))
        client_config = k8s_client.Configuration()
        client_config.verify_ssl = self.verify_ssl
        api_client = k8s_client.ApiClient(configuration=client_config)
        k8s_client.BatchV1Api(api_client=api_client).delete_namespaced_job(
            self._created_job.metadata.name,
            self._created_job.metadata.namespace,
            body=k8s_client.V1DeleteOptions(propagation_policy='Foreground'))
Example #16
0
    def __init__(self,
                 framework,
                 default_storage_uri=None,
                 canary_storage_uri=None,
                 canary_traffic_percent=0,
                 namespace=None,
                 labels=None,
                 annotations=None,
                 custom_default_container=None,
                 custom_canary_container=None,
                 isvc_name=None,
                 stream_log=False,
                 cleanup=False,
                 config_file=None,
                 context=None,
                 client_configuration=None,
                 persist_config=True):
        """
        :param framework: The framework for the InferenceService, such as Tensorflow,
            XGBoost and ScikitLearn etc.
        :param default_storage_uri: URI pointing to Saved Model assets for default service.
        :param canary_storage_uri: URI pointing to Saved Model assets for canary service.
        :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
        :param namespace: The k8s namespace where the InferenceService will be deployed.
        :param labels: Labels for the InferenceService, separate with commas if have more than one.
        :param annotations: Annotations for the InferenceService,
            separate with commas if have more than one.
        :param custom_default_container: A flexible custom default container for arbitrary customer
                                 provided containers.
        :param custom_canary_container: A flexible custom canary container for arbitrary customer
                                 provided containers.
        :param isvc_name: The InferenceService name.
        :param stream_log: Show log or not when InferenceService started, defaults to True.
        :param cleanup: Delete the kfserving or not, defaults to False.
        :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case
               that the SDK is running in cluster and you want to operate in another remote
               cluster, user must set config_file to load kube-config file explicitly.
        :param context: kubernetes context
        :param client_configuration: The kubernetes.client.Configuration to set configs to.
        :param persist_config: If True, config file will be updated when changed
        """
        self.framework = framework
        self.isvc_name = isvc_name
        self.default_storage_uri = default_storage_uri
        self.canary_storage_uri = canary_storage_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_container = custom_default_container
        self.custom_canary_container = custom_canary_container
        self.stream_log = stream_log
        self.backend = KubeManager(config_file=config_file,
                                   context=context,
                                   client_configuration=client_configuration,
                                   persist_config=persist_config)

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        if self.framework != 'custom' and self.default_storage_uri is None:
            raise RuntimeError("The default_storage_uri must be specified for "
                               "{} framework.".format(self.framework))
        if self.framework == 'custom' and self.custom_default_container is None:
            raise RuntimeError(
                "The custom_default_container must be specified "
                "for custom framework.")
Example #17
0
      template:
        spec:
          containers:
            - name: tensorflow
              image: kubeflow/tf-dist-mnist-test:1.0
    Worker:
      replicas: 1
      restartPolicy: Never
      template:
        spec:
          containers:
            - name: tensorflow
              image: kubeflow/tf-dist-mnist-test:1.0
'''

kubeflow_client = KubeManager()


def test_apply_namespaced_object_core_v1_api():
    '''
    Test apply_namespaced_object API for CoreV1Api
    '''
    kubeflow_client.apply_namespaced_object(core_api_test)
    kubeflow_client.apply_namespaced_object(core_api_test, mode='patch')
    kubeflow_client.apply_namespaced_object(core_api_test, mode='delete')


def test_apply_namespaced_object_apps_v1_api():
    '''
    Test apply_namespaced_object API for AppV1Api
    '''