def __init__(self, namespace=None, runs=1, output=None, cleanup=True, labels=None, job_name=constants.JOB_DEFAULT_NAME, stream_log=True, deployer_type=constants.JOB_DEPLOPYER_TYPE, pod_spec_mutators=None, annotations=None): """ :param namespace: k8s namespace where the training's components will be deployed. :param runs: Number of training(s) to be deployed. Hyperparameter search will generate multiple jobs. :param output: output :param cleanup: clean up deletes components after job finished :param labels: labels to be assigned to the training job :param job_name: name of the job :param stream_log: stream the log? :param deployer_type: type of deployer :param pod_spec_mutators: pod spec mutators (Default value = None) """ if namespace is None: self.namespace = utils.get_default_target_namespace() else: self.namespace = namespace # Used as pod and job name self.job_name = job_name self.deployer_type = deployer_type self.deployment_spec = None self.runs = runs self.output = output self.backend = KubeManager() self.cleanup = cleanup self.stream_log = stream_log self.set_labels(labels, deployer_type) self.set_anotations(annotations) self.pod_spec_mutators = pod_spec_mutators or []
def __init__(self, registry=None, image_name=constants.DEFAULT_IMAGE_NAME, context_source=None, preprocessor=None, push=True, base_image=constants.DEFAULT_BASE_IMAGE, pod_spec_mutators=None, namespace=None, dockerfile_path=None, cleanup=False, executable_path_prefix=None): super().__init__(registry=registry, image_name=image_name, push=push, preprocessor=preprocessor, base_image=base_image, dockerfile_path=dockerfile_path) self.manager = KubeManager() if context_source is None: raise RuntimeError("context_source is not specified") self.context_source = context_source self.pod_spec_mutators = pod_spec_mutators or [] self.namespace = namespace or utils.get_default_target_namespace() self.cleanup = cleanup self.executable_path_prefix = executable_path_prefix
def __init__(self, namespace=None, runs=1, output=None, cleanup=True, labels=None, job_name=None, stream_log=True, deployer_type=constants.JOB_DEPLOPYER_TYPE, pod_spec_mutators=None, annotations=None, config_file=None, context=None, client_configuration=None, persist_config=True, verify_ssl=True): """ :param namespace: k8s namespace where the training's components will be deployed. :param runs: Number of training(s) to be deployed. Hyperparameter search will generate multiple jobs. :param output: output :param cleanup: clean up deletes components after job finished :param labels: labels to be assigned to the training job :param job_name: name of the job :param stream_log: stream the log? :param deployer_type: type of deployer :param pod_spec_mutators: pod spec mutators (Default value = None) :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case that the SDK is running in cluster and you want to operate in another remote cluster, user must set config_file to load kube-config file explicitly. :param context: kubernetes context :param client_configuration: The kubernetes.client.Configuration to set configs to. :param persist_config: If True, config file will be updated when changed :param verify_ssl: use ssl verify or not, set in the client config """ if namespace is None: self.namespace = utils.get_default_target_namespace() else: self.namespace = namespace # Used as pod and job name self.job_name = job_name self.deployer_type = deployer_type self.deployment_spec = None self.runs = runs self.output = output self.backend = KubeManager(config_file=config_file, context=context, client_configuration=client_configuration, persist_config=persist_config, verify_ssl=verify_ssl) self.cleanup = cleanup self.stream_log = stream_log self.set_labels(labels, deployer_type) self.set_anotations(annotations) self.pod_spec_mutators = pod_spec_mutators or [] self.verify_ssl = verify_ssl
def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None, canary_traffic_percent=0, namespace=None, labels=None, annotations=None, custom_default_container=None, custom_canary_container=None, isvc_name=None, stream_log=False, cleanup=False): """ :param framework: The framework for the InferenceService, such as Tensorflow, XGBoost and ScikitLearn etc. :param default_storage_uri: URI pointing to Saved Model assets for default service. :param canary_storage_uri: URI pointing to Saved Model assets for canary service. :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0. :param namespace: The k8s namespace where the InferenceService will be deployed. :param labels: Labels for the InferenceService, separate with commas if have more than one. :param annotations: Annotations for the InferenceService, separate with commas if have more than one. :param custom_default_container: A flexible custom default container for arbitrary customer provided containers. :param custom_canary_container: A flexible custom canary container for arbitrary customer provided containers. :param isvc_name: The InferenceService name. :param stream_log: Show log or not when InferenceService started, defaults to True. :param cleanup: Delete the kfserving or not, defaults to False. """ self.framework = framework self.isvc_name = isvc_name self.default_storage_uri = default_storage_uri self.canary_storage_uri = canary_storage_uri self.canary_traffic_percent = canary_traffic_percent self.annotations = annotations self.set_labels(labels) self.cleanup = cleanup self.custom_default_container = custom_default_container self.custom_canary_container = custom_canary_container self.stream_log = stream_log self.backend = KubeManager() if namespace is None: self.namespace = utils.get_default_target_namespace() else: self.namespace = namespace if self.framework != 'custom' and self.default_storage_uri is None: raise RuntimeError("The default_storage_uri must be specified for " "{} framework.".format(self.framework)) if self.framework == 'custom' and self.custom_default_container is None: raise RuntimeError( "The custom_default_container must be specified " "for custom framework.")
def __init__(self, endpoint_url, minio_secret, minio_secret_key, region_name): self.endpoint_url = endpoint_url self.minio_secret = minio_secret self.minio_secret_key = minio_secret_key self.region_name = region_name self.Manager = KubeManager()
def get_ibm_cos_credentials(namespace): """ Get the IBM COS credential from secret. :param namespace(str): The namespace that IBM COS credential secret created in. """ secret_name = constants.IBM_COS_CREDS_SECRET_NAME if not KubeManager().secret_exists(secret_name, namespace): raise Exception("Secret '{}' not found in namespace '{}'".format( secret_name, namespace)) secret = client.CoreV1Api().read_namespaced_secret(secret_name, namespace) creds_data = secret.data[constants.IBM_COS_CREDS_FILE_NAME] creds_json = base64.b64decode(creds_data).decode('utf-8') cos_creds = json.loads(creds_json) if cos_creds.get('cos_hmac_keys', ''): aws_access_key_id = cos_creds['cos_hmac_keys'].get('access_key_id', '') aws_secret_accesss_key = cos_creds['cos_hmac_keys'].get( 'secret_access_key', '') else: raise RuntimeError( "Kaniko needs AWS_ACCESS_KEY_ID and AWS_SECRET_ACCESS_KEY\ if using S3 Bucket. Please use HMAC Credential.") return aws_access_key_id, aws_secret_accesss_key
def __init__(self, gcp_project=None, credentials_file=os.environ.get(constants.GOOGLE_CREDS_ENV), namespace='default'): self.gcp_project = gcp_project self.credentials_file = credentials_file self.manager = KubeManager() self.namespace = namespace
def __init__(self, aws_account=None, region=None, bucket_name=None): self.aws_account = aws_account self.manager = KubeManager() self.region = region or 'us-east-1' self.bucket_name = bucket_name
def __init__(self, framework, default_model_uri=None, canary_model_uri=None, canary_traffic_percent=0, namespace=None, labels=None, annotations=None, custom_default_spec=None, custom_canary_spec=None, stream_log=True, cleanup=False): """ :param framework: The framework for the kfservice, such as Tensorflow, XGBoost and ScikitLearn etc. :param default_model_uri: URI pointing to Saved Model assets for default service. :param canary_model_uri: URI pointing to Saved Model assets for canary service. :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0. :param namespace: The k8s namespace where the kfservice will be deployed. :param labels: Labels for the kfservice, separate with commas if have more than one. :param annotations: Annotations for the kfservice, separate with commas if have more than one. :param custom_default_spec: A flexible custom default specification for arbitrary customer provided containers. :param custom_canary_spec: A flexible custom canary specification for arbitrary customer provided containers. :param stream_log: Show log or not when kfservice started, defaults to True. :param cleanup: Delete the kfserving or not, defaults to False. """ self.framework = framework self.default_model_uri = default_model_uri self.canary_model_uri = canary_model_uri self.canary_traffic_percent = canary_traffic_percent self.annotations = annotations self.set_labels(labels) self.cleanup = cleanup self.custom_default_spec = custom_default_spec self.custom_canary_spec = custom_canary_spec self.stream_log = stream_log self.backend = KubeManager() if namespace is None: self.namespace = utils.get_default_target_namespace() else: self.namespace = namespace
def get_azure_credentials(namespace): secret_name = constants.AZURE_CREDS_SECRET_NAME if not KubeManager().secret_exists(secret_name, namespace): raise Exception("Secret '{}' not found in namespace '{}'".format( secret_name, namespace)) v1 = client.CoreV1Api() secret = v1.read_namespaced_secret(secret_name, namespace) sp_credentials = ServicePrincipalCredentials( client_id=get_plain_secret_value(secret.data, 'AZ_CLIENT_ID'), secret=get_plain_secret_value(secret.data, 'AZ_CLIENT_SECRET'), tenant=get_plain_secret_value(secret.data, 'AZ_TENANT_ID')) subscription_id = get_plain_secret_value(secret.data, 'AZ_SUBSCRIPTION_ID') return sp_credentials, subscription_id
def get_builder(self, preprocessor, base_image, registry, needs_deps_installation=True, pod_spec_mutators=None): """Creates a builder instance with right config for GKE :param preprocessor: Preprocessor to use to modify inputs :param base_image: Base image to use for this job :param registry: Registry to push image to. Example: gcr.io/kubeflow-images :param needs_deps_installation: need depends on installation(Default value = True) :param pod_spec_mutators: list of functions that is used to mutate the podsspec. e.g. fairing.cloud.gcp.add_gcp_credentials_if_exists This can used to set things like volumes and security context. (Default value =None) """ pod_spec_mutators = pod_spec_mutators or [] pod_spec_mutators.append(gcp.add_gcp_credentials_if_exists) if not needs_deps_installation: return AppendBuilder(preprocessor=preprocessor, base_image=base_image, registry=registry) elif (utils.is_running_in_k8s() or not ml_tasks_utils.is_docker_daemon_exists()): return ClusterBuilder(preprocessor=preprocessor, base_image=base_image, registry=registry, pod_spec_mutators=pod_spec_mutators, namespace=self._namespace, context_source=self._build_context_source) elif ml_tasks_utils.is_docker_daemon_exists(): return DockerBuilder(preprocessor=preprocessor, base_image=base_image, registry=registry) else: msg = ["Not able to guess the right builder for this job!"] if KubeManager().secret_exists(constants.GCP_CREDS_SECRET_NAME, self._namespace): msg.append("It seems you don't have permission to list/access secrets in your " "Kubeflow cluster. We need this permission in order to build a docker " "image using Kubeflow cluster. Adding Kubeneters Admin role to the " "service account you are using might solve this issue.") if not utils.is_running_in_k8s(): msg.append(" Also If you are using 'sudo' to access docker in your system you can" " solve this problem by adding your username to the docker group. " "Reference: https://docs.docker.com/install/linux/linux-postinstall/" "#manage-docker-as-a-non-root-user You need to logout and login to " "get change activated.") message = " ".join(msg) raise RuntimeError(message)
class ClusterBuilder(BaseBuilder): """Builds a docker image in a Kubernetes cluster. """ def __init__(self, registry=None, image_name=constants.DEFAULT_IMAGE_NAME, context_source=None, preprocessor=None, push=True, base_image=constants.DEFAULT_BASE_IMAGE, pod_spec_mutators=None, namespace=None, dockerfile_path=None, cleanup=False, executable_path_prefix=None): super().__init__(registry=registry, image_name=image_name, push=push, preprocessor=preprocessor, base_image=base_image, dockerfile_path=dockerfile_path) self.manager = KubeManager() if context_source is None: raise RuntimeError("context_source is not specified") self.context_source = context_source self.pod_spec_mutators = pod_spec_mutators or [] self.namespace = namespace or utils.get_default_target_namespace() self.cleanup = cleanup self.executable_path_prefix = executable_path_prefix def build(self): logging.info("Building image using cluster builder.") install_reqs_before_copy = self.preprocessor.is_requirements_txt_file_present( ) if self.dockerfile_path: dockerfile_path = self.dockerfile_path else: dockerfile_path = dockerfile.write_dockerfile( path_prefix=self.preprocessor.path_prefix, base_image=self.base_image, install_reqs_before_copy=install_reqs_before_copy, executable_path_prefix=self.executable_path_prefix) self.preprocessor.output_map[dockerfile_path] = 'Dockerfile' context_path, context_hash = self.preprocessor.context_tar_gz() self.image_tag = self.full_image_name(context_hash) self.context_source.prepare(context_path) labels = {'fairing-builder': 'kaniko'} labels['fairing-build-id'] = str(uuid.uuid1()) pod_spec = self.context_source.generate_pod_spec( self.image_tag, self.push) for fn in self.pod_spec_mutators: fn(self.manager, pod_spec, self.namespace) pod_spec_template = client.V1PodTemplateSpec( metadata=client.V1ObjectMeta( generate_name="fairing-builder-", labels=labels, namespace=self.namespace, annotations={"sidecar.istio.io/inject": "false"}, ), spec=pod_spec) job_spec = client.V1JobSpec( template=pod_spec_template, parallelism=1, completions=1, backoff_limit=0, ) build_job = client.V1Job(api_version="batch/v1", kind="Job", metadata=client.V1ObjectMeta( generate_name="fairing-builder-", labels=labels, ), spec=job_spec) created_job = client. \ BatchV1Api(). \ create_namespaced_job(self.namespace, build_job) self.manager.log(name=created_job.metadata.name, namespace=created_job.metadata.namespace, selectors=labels, container="kaniko") # Invoke upstream clean ups self.context_source.cleanup() # Cleanup build_job if requested by user # Otherwise build_job will be cleaned up by Kubernetes GC if self.cleanup: logging.warning("Cleaning up job {}...".format( created_job.metadata.name)) client. \ BatchV1Api(). \ delete_namespaced_job( created_job.metadata.name, created_job.metadata.namespace, body=client.V1DeleteOptions(propagation_policy='Foreground') )
class KFServing(DeployerInterface): """Serves a prediction endpoint using Kubeflow KFServing.""" def __init__(self, framework, default_model_uri=None, canary_model_uri=None, canary_traffic_percent=0, namespace=None, labels=None, annotations=None, custom_default_spec=None, custom_canary_spec=None, stream_log=True, cleanup=False): """ :param framework: The framework for the kfservice, such as Tensorflow, XGBoost and ScikitLearn etc. :param default_model_uri: URI pointing to Saved Model assets for default service. :param canary_model_uri: URI pointing to Saved Model assets for canary service. :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0. :param namespace: The k8s namespace where the kfservice will be deployed. :param labels: Labels for the kfservice, separate with commas if have more than one. :param annotations: Annotations for the kfservice, separate with commas if have more than one. :param custom_default_spec: A flexible custom default specification for arbitrary customer provided containers. :param custom_canary_spec: A flexible custom canary specification for arbitrary customer provided containers. :param stream_log: Show log or not when kfservice started, defaults to True. :param cleanup: Delete the kfserving or not, defaults to False. """ self.framework = framework self.default_model_uri = default_model_uri self.canary_model_uri = canary_model_uri self.canary_traffic_percent = canary_traffic_percent self.annotations = annotations self.set_labels(labels) self.cleanup = cleanup self.custom_default_spec = custom_default_spec self.custom_canary_spec = custom_canary_spec self.stream_log = stream_log self.backend = KubeManager() if namespace is None: self.namespace = utils.get_default_target_namespace() else: self.namespace = namespace def set_labels(self, labels): """set label for deployed prediction :param labels: dictionary of labels {label_name:label_value} """ self.fairing_id = str(uuid.uuid1()) self.labels = {'fairing-id': self.fairing_id} if labels: self.labels.update(labels) def deploy(self, template_spec): # pylint:disable=arguments-differ,unused-argument """deploy kfserving endpoint :param template_spec: template spec """ self.kfservice = self.generate_kfservice() self.created_kfserving = self.backend.create_kfserving( self.namespace, self.kfservice) if self.stream_log: self.get_logs() kfservice_name = self.created_kfserving['metadata']['name'] logger.warning( "Deployed the kfservice {} successfully.".format(kfservice_name)) if self.cleanup: logger.warning("Cleaning up kfservice {}...".format(kfservice_name)) self.backend.delete_kfserving(kfservice_name, self.namespace) return kfservice_name def generate_kfservice(self): """ generate kfserving template""" spec = {} spec['default'] = {} if self.framework is not 'custom': # pylint:disable=literal-comparison if self.default_model_uri is not None: spec['default'][self.framework] = {} spec['default'][self.framework]['modelUri'] = self.default_model_uri else: raise RuntimeError( "The default_model_uri must be defined if the framework is not custom.") else: if self.custom_default_spec is not None: # TBD @jinchi Need to validate the custom_default_spec before executing. spec['default'][self.framework] = self.custom_default_spec else: raise RuntimeError( "The custom_default_spec must be defined if the framework is custom.") if self.framework != 'custom': if self.canary_model_uri is not None: spec['canary'] = {} spec['canary'][self.framework] = {} spec['canary'][self.framework]['modelUri'] = self.canary_model_uri spec['canaryTrafficPercent'] = self.canary_traffic_percent else: if self.custom_default_spec is not None: spec['canary'] = {} spec['canary'][self.framework] = self.custom_canary_spec spec['canaryTrafficPercent'] = self.canary_traffic_percent metadata = k8s_client.V1ObjectMeta( generate_name=constants.KFSERVING_DEFAULT_NAME, namespace=self.namespace, labels=self.labels, annotations=self.annotations ) kfservice = {} kfservice['kind'] = constants.KFSERVING_KIND kfservice['apiVersion'] = constants.KFSERVING_GROUP + \ '/' + constants.KFSERVING_VERSION kfservice['metadata'] = metadata kfservice['spec'] = spec return kfservice def get_logs(self): """ get log from prediction pod""" name = self.created_kfserving['metadata']['name'] namespace = self.created_kfserving['metadata']['namespace'] self.backend.log(name, namespace, self.labels, container=constants.KFSERVING_CONTAINER_NAME, follow=False)
class KFServing(DeployerInterface): """Serves a prediction endpoint using Kubeflow KFServing.""" def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None, canary_traffic_percent=0, namespace=None, labels=None, annotations=None, custom_default_container=None, custom_canary_container=None, isvc_name=None, stream_log=False, cleanup=False, config_file=None, context=None, client_configuration=None, persist_config=True): """ :param framework: The framework for the InferenceService, such as Tensorflow, XGBoost and ScikitLearn etc. :param default_storage_uri: URI pointing to Saved Model assets for default service. :param canary_storage_uri: URI pointing to Saved Model assets for canary service. :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0. :param namespace: The k8s namespace where the InferenceService will be deployed. :param labels: Labels for the InferenceService, separate with commas if have more than one. :param annotations: Annotations for the InferenceService, separate with commas if have more than one. :param custom_default_container: A flexible custom default container for arbitrary customer provided containers. :param custom_canary_container: A flexible custom canary container for arbitrary customer provided containers. :param isvc_name: The InferenceService name. :param stream_log: Show log or not when InferenceService started, defaults to True. :param cleanup: Delete the kfserving or not, defaults to False. :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case that the SDK is running in cluster and you want to operate in another remote cluster, user must set config_file to load kube-config file explicitly. :param context: kubernetes context :param client_configuration: The kubernetes.client.Configuration to set configs to. :param persist_config: If True, config file will be updated when changed """ self.framework = framework self.isvc_name = isvc_name self.default_storage_uri = default_storage_uri self.canary_storage_uri = canary_storage_uri self.canary_traffic_percent = canary_traffic_percent self.annotations = annotations self.set_labels(labels) self.cleanup = cleanup self.custom_default_container = custom_default_container self.custom_canary_container = custom_canary_container self.stream_log = stream_log self.backend = KubeManager(config_file=config_file, context=context, client_configuration=client_configuration, persist_config=persist_config) if namespace is None: self.namespace = utils.get_default_target_namespace() else: self.namespace = namespace if self.framework != 'custom' and self.default_storage_uri is None: raise RuntimeError("The default_storage_uri must be specified for " "{} framework.".format(self.framework)) if self.framework == 'custom' and self.custom_default_container is None: raise RuntimeError( "The custom_default_container must be specified " "for custom framework.") def set_labels(self, labels): """set label for deployed prediction :param labels: dictionary of labels {label_name:label_value} """ self.fairing_id = str(uuid.uuid1()) self.labels = {'fairing-id': self.fairing_id} if labels: self.labels.update(labels) def deploy(self, isvc): # pylint:disable=arguments-differ,unused-argument """deploy kfserving endpoint :param isvc: InferenceService for deploying. """ self.created_isvc = self.backend.create_isvc(self.namespace, self.generate_isvc()) if self.stream_log: self.get_logs() isvc_name = self.created_isvc['metadata']['name'] logger.info( "Deployed the InferenceService {} successfully.".format(isvc_name)) if self.cleanup: logger.warning( "Cleaning up InferenceService {}...".format(isvc_name)) self.backend.delete_isvc(isvc_name, self.namespace) return isvc_name def generate_isvc(self): """ generate InferenceService """ api_version = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION default_predictor, canary_predictor = None, None if self.framework == 'custom': default_predictor = self.generate_predictor_spec( self.framework, container=self.custom_default_container) else: default_predictor = self.generate_predictor_spec( self.framework, storage_uri=self.default_storage_uri) if self.framework != 'custom' and self.canary_storage_uri is not None: canary_predictor = self.generate_predictor_spec( self.framework, storage_uri=self.canary_storage_uri) if self.framework == 'custom' and self.custom_canary_container is not None: canary_predictor = self.generate_predictor_spec( self.framework, container=self.custom_canary_container) if canary_predictor: isvc_spec = V1alpha2InferenceServiceSpec( default=V1alpha2EndpointSpec(predictor=default_predictor), canary=V1alpha2EndpointSpec(predictor=canary_predictor), canary_traffic_percent=self.canary_traffic_percent) else: isvc_spec = V1alpha2InferenceServiceSpec( default=V1alpha2EndpointSpec(predictor=default_predictor), canary_traffic_percent=self.canary_traffic_percent) return V1alpha2InferenceService( api_version=api_version, kind=constants.KFSERVING_KIND, metadata=k8s_client.V1ObjectMeta( name=self.isvc_name, generate_name=constants.KFSERVING_DEFAULT_NAME, namespace=self.namespace), spec=isvc_spec) def generate_predictor_spec(self, framework, storage_uri=None, container=None): '''Generate predictor spec according to framework and default_storage_uri or custom container. ''' if self.framework == 'tensorflow': predictor = V1alpha2PredictorSpec( tensorflow=V1alpha2TensorflowSpec(storage_uri=storage_uri)) elif self.framework == 'onnx': predictor = V1alpha2PredictorSpec(onnx=V1alpha2ONNXSpec( storage_uri=storage_uri)) elif self.framework == 'pytorch': predictor = V1alpha2PredictorSpec(pytorch=V1alpha2PyTorchSpec( storage_uri=storage_uri)) elif self.framework == 'sklearn': predictor = V1alpha2PredictorSpec(sklearn=V1alpha2SKLearnSpec( storage_uri=storage_uri)) elif self.framework == 'triton': predictor = V1alpha2PredictorSpec(triton=V1alpha2TritonSpec( storage_uri=storage_uri)) elif self.framework == 'xgboost': predictor = V1alpha2PredictorSpec(xgboost=V1alpha2XGBoostSpec( storage_uri=storage_uri)) elif self.framework == 'custom': predictor = V1alpha2PredictorSpec(custom=V1alpha2CustomSpec( container=container)) else: raise RuntimeError("Unsupported framework {}".format(framework)) return predictor def get_logs(self): """ get log from prediction pod""" name = self.created_isvc['metadata']['name'] namespace = self.created_isvc['metadata']['namespace'] self.backend.log(name, namespace, self.labels, container=constants.KFSERVING_CONTAINER_NAME, follow=False)
class Job(DeployerInterface): #pylint:disable=too-many-instance-attributes """Handle all the k8s' template building for a training""" def __init__(self, namespace=None, runs=1, output=None, cleanup=True, labels=None, job_name=None, stream_log=True, deployer_type=constants.JOB_DEPLOPYER_TYPE, pod_spec_mutators=None, annotations=None, config_file=None, context=None, client_configuration=None, persist_config=True, verify_ssl=True): """ :param namespace: k8s namespace where the training's components will be deployed. :param runs: Number of training(s) to be deployed. Hyperparameter search will generate multiple jobs. :param output: output :param cleanup: clean up deletes components after job finished :param labels: labels to be assigned to the training job :param job_name: name of the job :param stream_log: stream the log? :param deployer_type: type of deployer :param pod_spec_mutators: pod spec mutators (Default value = None) :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case that the SDK is running in cluster and you want to operate in another remote cluster, user must set config_file to load kube-config file explicitly. :param context: kubernetes context :param client_configuration: The kubernetes.client.Configuration to set configs to. :param persist_config: If True, config file will be updated when changed :param verify_ssl: use ssl verify or not, set in the client config """ if namespace is None: self.namespace = utils.get_default_target_namespace() else: self.namespace = namespace # Used as pod and job name self.job_name = job_name self.deployer_type = deployer_type self.deployment_spec = None self.runs = runs self.output = output self.backend = KubeManager(config_file=config_file, context=context, client_configuration=client_configuration, persist_config=persist_config, verify_ssl=verify_ssl) self.cleanup = cleanup self.stream_log = stream_log self.set_labels(labels, deployer_type) self.set_anotations(annotations) self.pod_spec_mutators = pod_spec_mutators or [] self.verify_ssl = verify_ssl def set_anotations(self, annotations): self.annotations = {} if annotations: self.annotations.update(annotations) def set_labels(self, labels, deployer_type): """set labels for the pods of a deployed job :param labels: dictionary of labels {label_name:label_value} :param deployer_type: deployer type name """ self.labels = {'fairing-deployer': deployer_type} if labels: self.labels.update(labels) def deploy(self, pod_spec): #pylint:disable=arguments-differ """deploy the training job using k8s client lib :param pod_spec: pod spec of deployed training job """ self.job_id = str(uuid.uuid1()) self.labels['fairing-id'] = self.job_id for fn in self.pod_spec_mutators: fn(self.backend, pod_spec, self.namespace) pod_template_spec = self.generate_pod_template_spec(pod_spec) pod_template_spec.spec.restart_policy = 'Never' pod_template_spec.spec.containers[0].name = 'fairing-job' self.deployment_spec = self.generate_deployment_spec(pod_template_spec) if self.output: api = k8s_client.ApiClient() job_output = api.sanitize_for_serialization(self.deployment_spec) print(json.dumps(job_output)) name = self.create_resource() logger.warning("The {} {} launched.".format(self.deployer_type, name)) if self.stream_log: self.get_logs() return name def create_resource(self): """ create job""" self._created_job = self.backend.create_job(self.namespace, self.deployment_spec) return self._created_job.metadata.name def generate_pod_template_spec(self, pod_spec): """Generate a V1PodTemplateSpec initiazlied with correct metadata and with the provided pod_spec :param pod_spec: pod spec """ if not isinstance(pod_spec, k8s_client.V1PodSpec): raise TypeError('pod_spec must be a V1PodSpec, but got %s' % type(pod_spec)) if not self.annotations: self.annotations = {'sidecar.istio.io/inject': 'false'} else: self.annotations['sidecar.istio.io/inject'] = 'false' return k8s_client.V1PodTemplateSpec(metadata=k8s_client.V1ObjectMeta( name="fairing-deployer", annotations=self.annotations, labels=self.labels), spec=pod_spec) def generate_deployment_spec(self, pod_template_spec): """Generate a V1Job initialized with correct completion and parallelism (for HP search) and with the provided V1PodTemplateSpec :param pod_template_spec: V1PodTemplateSpec """ if not isinstance(pod_template_spec, k8s_client.V1PodTemplateSpec): raise TypeError("""pod_template_spec must be a V1PodTemplateSpec, but got %s""" % type(pod_template_spec)) job_spec = k8s_client.V1JobSpec( template=pod_template_spec, parallelism=self.runs, completions=self.runs, backoff_limit=0, ) return k8s_client.V1Job(api_version="batch/v1", kind="Job", metadata=k8s_client.V1ObjectMeta( name=self.job_name, generate_name=constants.JOB_DEFAULT_NAME, labels=self.labels), spec=job_spec) def get_logs(self): """ get logs from the deployed job""" self.backend.log(self._created_job.metadata.name, self._created_job.metadata.namespace, self.labels, container="fairing-job") if self.cleanup: self.do_cleanup() def do_cleanup(self): """ clean up the pods after job finished""" logger.warning("Cleaning up job {}...".format( self._created_job.metadata.name)) client_config = k8s_client.Configuration() client_config.verify_ssl = self.verify_ssl api_client = k8s_client.ApiClient(configuration=client_config) k8s_client.BatchV1Api(api_client=api_client).delete_namespaced_job( self._created_job.metadata.name, self._created_job.metadata.namespace, body=k8s_client.V1DeleteOptions(propagation_policy='Foreground'))
def __init__(self, framework, default_storage_uri=None, canary_storage_uri=None, canary_traffic_percent=0, namespace=None, labels=None, annotations=None, custom_default_container=None, custom_canary_container=None, isvc_name=None, stream_log=False, cleanup=False, config_file=None, context=None, client_configuration=None, persist_config=True): """ :param framework: The framework for the InferenceService, such as Tensorflow, XGBoost and ScikitLearn etc. :param default_storage_uri: URI pointing to Saved Model assets for default service. :param canary_storage_uri: URI pointing to Saved Model assets for canary service. :param canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0. :param namespace: The k8s namespace where the InferenceService will be deployed. :param labels: Labels for the InferenceService, separate with commas if have more than one. :param annotations: Annotations for the InferenceService, separate with commas if have more than one. :param custom_default_container: A flexible custom default container for arbitrary customer provided containers. :param custom_canary_container: A flexible custom canary container for arbitrary customer provided containers. :param isvc_name: The InferenceService name. :param stream_log: Show log or not when InferenceService started, defaults to True. :param cleanup: Delete the kfserving or not, defaults to False. :param config_file: kubeconfig file, defaults to ~/.kube/config. Note that for the case that the SDK is running in cluster and you want to operate in another remote cluster, user must set config_file to load kube-config file explicitly. :param context: kubernetes context :param client_configuration: The kubernetes.client.Configuration to set configs to. :param persist_config: If True, config file will be updated when changed """ self.framework = framework self.isvc_name = isvc_name self.default_storage_uri = default_storage_uri self.canary_storage_uri = canary_storage_uri self.canary_traffic_percent = canary_traffic_percent self.annotations = annotations self.set_labels(labels) self.cleanup = cleanup self.custom_default_container = custom_default_container self.custom_canary_container = custom_canary_container self.stream_log = stream_log self.backend = KubeManager(config_file=config_file, context=context, client_configuration=client_configuration, persist_config=persist_config) if namespace is None: self.namespace = utils.get_default_target_namespace() else: self.namespace = namespace if self.framework != 'custom' and self.default_storage_uri is None: raise RuntimeError("The default_storage_uri must be specified for " "{} framework.".format(self.framework)) if self.framework == 'custom' and self.custom_default_container is None: raise RuntimeError( "The custom_default_container must be specified " "for custom framework.")
template: spec: containers: - name: tensorflow image: kubeflow/tf-dist-mnist-test:1.0 Worker: replicas: 1 restartPolicy: Never template: spec: containers: - name: tensorflow image: kubeflow/tf-dist-mnist-test:1.0 ''' kubeflow_client = KubeManager() def test_apply_namespaced_object_core_v1_api(): ''' Test apply_namespaced_object API for CoreV1Api ''' kubeflow_client.apply_namespaced_object(core_api_test) kubeflow_client.apply_namespaced_object(core_api_test, mode='patch') kubeflow_client.apply_namespaced_object(core_api_test, mode='delete') def test_apply_namespaced_object_apps_v1_api(): ''' Test apply_namespaced_object API for AppV1Api '''