Example #1
0
    def __init__(self,
                 framework,
                 default_model_uri=None,
                 canary_model_uri=None,
                 canary_traffic_percent=0,
                 namespace=None,
                 labels=None,
                 annotations=None,
                 custom_default_spec=None,
                 custom_canary_spec=None,
                 stream_log=True,
                 cleanup=False):
        self.framework = framework
        self.default_model_uri = default_model_uri
        self.canary_model_uri = canary_model_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_spec = custom_default_spec
        self.custom_canary_spec = custom_canary_spec
        self.stream_log = stream_log
        self.backend = KubeManager()

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace
Example #2
0
    def __init__(self,
                 namespace=None,
                 runs=1,
                 output=None,
                 cleanup=True,
                 labels=None,
                 job_name=DEFAULT_JOB_NAME,
                 stream_log=True,
                 deployer_type=DEPLOPYER_TYPE,
                 pod_spec_mutators=None):
        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        # Used as pod and job name
        self.job_name = job_name
        self.deployment_spec = None
        self.runs = runs
        self.output = output
        self.backend = KubeManager()
        self.cleanup = cleanup
        self.stream_log = stream_log
        self.set_labels(labels, deployer_type)
        self.pod_spec_mutators = pod_spec_mutators or []
Example #3
0
class ClusterBuilder(BaseBuilder):
    """Builds a docker image in a Kubernetes cluster.


     Args:
        registry (str): Required. Registry to push image to
                        Example: gcr.io/kubeflow-images
        base_image (str): Base image to use for the image build
        preprocessor (BasePreProcessor): Preprocessor to use to modify inputs
                                         before sending them to docker build
        context_source (ContextSourceInterface): context available to the
                                                 cluster build
    """
    def __init__(self,
                 registry=None,
                 context_source=gcs_context.GCSContextSource(),
                 preprocessor=None,
                 base_image=constants.DEFAULT_BASE_IMAGE,
                 dockerfile_path=None):
        super().__init__(
            registry=registry,
            preprocessor=preprocessor,
            base_image=base_image,
        )
        self.manager = KubeManager()
        self.context_source = context_source

    def build(self):
        dockerfile_path = dockerfile.write_dockerfile(
            dockerfile_path=self.dockerfile_path, base_image=self.base_image)
        self.preprocessor.output_map[dockerfile_path] = 'Dockerfile'
        context_path, context_hash = self.preprocessor.context_tar_gz()
        self.image_tag = self.full_image_name(context_hash)
        self.context_source.prepare(context_path)
        labels = {'fairing-builder': 'kaniko'}
        build_pod = client.V1Pod(api_version="v1",
                                 kind="Pod",
                                 metadata=client.V1ObjectMeta(
                                     generate_name="fairing-builder-",
                                     labels=labels,
                                 ),
                                 spec=self.context_source.generate_pod_spec(
                                     self.image_tag))
        created_pod = client. \
            CoreV1Api(). \
            create_namespaced_pod("default", build_pod)
        self.manager.log(name=created_pod.metadata.name,
                         namespace=created_pod.metadata.namespace,
                         selectors=labels)

        # clean up created pod and secret
        self.context_source.cleanup()
        client.CoreV1Api().delete_namespaced_pod(
            created_pod.metadata.name, created_pod.metadata.namespace,
            client.V1DeleteOptions())
Example #4
0
    def __init__(self, namespace=None, runs=1, output=None, labels={'fairing-deployer': 'job'}):
        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        # Used as pod and job name
        self.deployment_spec = None
        self.runs = runs
        self.output = output
        self.labels = labels
        self.backend = KubeManager()
Example #5
0
 def __init__(self,
              registry=None,
              context_source=gcs_context.GCSContextSource(),
              preprocessor=None,
              base_image=constants.DEFAULT_BASE_IMAGE,
              dockerfile_path=None):
     super().__init__(
         registry=registry,
         preprocessor=preprocessor,
         base_image=base_image,
     )
     self.manager = KubeManager()
     self.context_source = context_source
Example #6
0
 def __init__(self,
              gcp_project=None,
              credentials_file=os.environ.get(constants.GOOGLE_CREDS_ENV),
              namespace='default'):
     self.gcp_project = gcp_project
     self.credentials_file = credentials_file
     self.manager = KubeManager()
     self.namespace = namespace
Example #7
0
    def get_builder(self,
                    preprocessor,
                    base_image,
                    registry,
                    needs_deps_installation=True,
                    pod_spec_mutators=None):

        pod_spec_mutators = pod_spec_mutators or []
        pod_spec_mutators.append(gcp.add_gcp_credentials_if_exists)

        if not needs_deps_installation:
            return AppendBuilder(preprocessor=preprocessor,
                                 base_image=base_image,
                                 registry=registry)
        elif (fairing.utils.is_running_in_k8s() or
              not ml_tasks_utils.is_docker_daemon_exists()) and \
                KubeManager().secret_exists(constants.GCP_CREDS_SECRET_NAME, self._namespace):
            return ClusterBuilder(preprocessor=preprocessor,
                                  base_image=base_image,
                                  registry=registry,
                                  pod_spec_mutators=pod_spec_mutators,
                                  namespace=self._namespace,
                                  context_source=self._build_context_source)
        elif ml_tasks_utils.is_docker_daemon_exists():
            return DockerBuilder(preprocessor=preprocessor,
                                 base_image=base_image,
                                 registry=registry)
        else:
            msg = ["Not able to guess the right builder for this job!"]
            if KubeManager().secret_exists(constants.GCP_CREDS_SECRET_NAME,
                                           self._namespace):
                msg.append(
                    "It seems you don't have permission to list/access secrets in your "
                    "Kubeflow cluster. We need this permission in order to build a docker "
                    "image using Kubeflow cluster. Adding Kubeneters Admin role to the "
                    "service account you are using might solve this issue.")
            if not fairing.utils.is_running_in_k8s():
                msg.append(
                    " Also If you are using 'sudo' to access docker in your system you can"
                    " solve this problem by adding your username to the docker group. "
                    "Reference: https://docs.docker.com/install/linux/linux-postinstall/"
                    "#manage-docker-as-a-non-root-user You need to logout and login to "
                    "get change activated.")
            message = " ".join(msg)
            raise RuntimeError(message)
Example #8
0
 def __init__(self,
              registry=None,
              image_name=constants.DEFAULT_IMAGE_NAME,
              context_source=None,
              preprocessor=None,
              push=True,
              base_image=constants.DEFAULT_BASE_IMAGE,
              pod_spec_mutators=None,
              namespace=None,
              dockerfile_path=None):
     super().__init__(
         registry=registry,
         image_name=image_name,
         push=push,
         preprocessor=preprocessor,
         base_image=base_image)
     self.manager = KubeManager()
     if context_source is None:
         raise RuntimeError("context_source is not specified")
     self.context_source = context_source
     self.pod_spec_mutators = pod_spec_mutators or []
     self.namespace = namespace or fairing.utils.get_default_target_namespace()
Example #9
0
 def __init__(self,
              registry=None,
              image_name=constants.DEFAULT_IMAGE_NAME,
              context_source=None,
              preprocessor=None,
              push=True,
              base_image=constants.DEFAULT_BASE_IMAGE,
              pod_spec_mutators=None,
              namespace="kubeflow",
              dockerfile_path=None):
     super().__init__(
             registry=registry,
             image_name=image_name,
             push=push,
             preprocessor=preprocessor,
             base_image=base_image,
         )
     self.manager = KubeManager()
     if context_source is None:
         context_source = gcs_context.GCSContextSource(namespace=namespace)
     self.context_source = context_source
     self.pod_spec_mutators = pod_spec_mutators or []
     self.namespace = namespace
Example #10
0
    def __init__(self,
                 region=None,
                 storage_account_name=None,
                 group_name=None,
                 container_name=None,
                 namespace='default'):
        self.region = region or "westeurope"
        # TODO ME note that the generated name is not necessarily unique due to truncation...
        self.storage_account_name = storage_account_name or f"{uuid.uuid4().hex[:24]}"
        self.container_name = container_name or "fairing-demo"

        self.manager = KubeManager()
        self.namespace = namespace
        self.group_name = group_name
Example #11
0
def get_azure_credentials(namespace):
    secret_name = constants.AZURE_CREDS_SECRET_NAME
    if not KubeManager().secret_exists(secret_name, namespace):
        raise Exception("Secret '{}' not found in namespace '{}'".format(
            secret_name, namespace))

    v1 = client.CoreV1Api()
    secret = v1.read_namespaced_secret(secret_name, namespace)
    sp_credentials = ServicePrincipalCredentials(
        client_id=get_plain_secret_value(secret.data, 'AZ_CLIENT_ID'),
        secret=get_plain_secret_value(secret.data, 'AZ_CLIENT_SECRET'),
        tenant=get_plain_secret_value(secret.data, 'AZ_TENANT_ID'))
    subscription_id = get_plain_secret_value(secret.data, 'AZ_SUBSCRIPTION_ID')
    return sp_credentials, subscription_id
Example #12
0
class ClusterBuilder(BaseBuilder):
    """Builds a docker image in a Kubernetes cluster.


     Args:
        registry (str): Required. Registry to push image to
                        Example: gcr.io/kubeflow-images
        base_image (str): Base image to use for the image build
        preprocessor (BasePreProcessor): Preprocessor to use to modify inputs
                                         before sending them to docker build
        context_source (ContextSourceInterface): context available to the
                                                 cluster build
        push {bool} -- Whether or not to push the image to the registry
    """
    def __init__(self,
                 registry=None,
                 image_name=constants.DEFAULT_IMAGE_NAME,
                 context_source=None,
                 preprocessor=None,
                 push=True,
                 base_image=constants.DEFAULT_BASE_IMAGE,
                 pod_spec_mutators=None,
                 namespace="kubeflow",
                 dockerfile_path=None):
        super().__init__(
            registry=registry,
            image_name=image_name,
            push=push,
            preprocessor=preprocessor,
            base_image=base_image,
        )
        self.manager = KubeManager()
        if context_source is None:
            raise RuntimeError("context_source is not specified")
        self.context_source = context_source
        self.pod_spec_mutators = pod_spec_mutators or []
        self.namespace = namespace

    def build(self):
        logging.info("Building image using cluster builder.")
        install_reqs_before_copy = self.preprocessor.is_requirements_txt_file_present(
        )
        dockerfile_path = dockerfile.write_dockerfile(
            dockerfile_path=self.dockerfile_path,
            path_prefix=self.preprocessor.path_prefix,
            base_image=self.base_image,
            install_reqs_before_copy=install_reqs_before_copy)
        self.preprocessor.output_map[dockerfile_path] = 'Dockerfile'
        context_path, context_hash = self.preprocessor.context_tar_gz()
        self.image_tag = self.full_image_name(context_hash)
        self.context_source.prepare(context_path)
        labels = {'fairing-builder': 'kaniko'}
        labels['fairing-build-id'] = str(uuid.uuid1())
        pod_spec = self.context_source.generate_pod_spec(
            self.image_tag, self.push)
        for fn in self.pod_spec_mutators:
            fn(self.manager, pod_spec, self.namespace)
        build_pod = client.V1Pod(api_version="v1",
                                 kind="Pod",
                                 metadata=client.V1ObjectMeta(
                                     generate_name="fairing-builder-",
                                     labels=labels,
                                     namespace=self.namespace,
                                 ),
                                 spec=pod_spec)
        created_pod = client. \
            CoreV1Api(). \
            create_namespaced_pod(self.namespace, build_pod)
        self.manager.log(name=created_pod.metadata.name,
                         namespace=created_pod.metadata.namespace,
                         selectors=labels)

        # clean up created pod and secret
        self.context_source.cleanup()
        client.CoreV1Api().delete_namespaced_pod(
            created_pod.metadata.name,
            created_pod.metadata.namespace,
            body=client.V1DeleteOptions())
Example #13
0
 def __init__(self, aws_account=None, region=None, bucket_name=None):
     self.aws_account = aws_account
     self.manager = KubeManager()
     self.region = region or 'us-east-1'
     self.bucket_name = bucket_name
Example #14
0
class Job(DeployerInterface):
    """Handle all the k8s' template building for a training 
    Attributes:
        namespace: k8s namespace where the training's components 
            will be deployed.
        runs: Number of training(s) to be deployed. Hyperparameter search
            will generate multiple jobs.
    """
    def __init__(self,
                 namespace=None,
                 runs=1,
                 output=None,
                 cleanup=True,
                 labels=None,
                 job_name=DEFAULT_JOB_NAME,
                 stream_log=True,
                 deployer_type=DEPLOPYER_TYPE,
                 pod_spec_mutators=None):
        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

        # Used as pod and job name
        self.job_name = job_name
        self.deployment_spec = None
        self.runs = runs
        self.output = output
        self.backend = KubeManager()
        self.cleanup = cleanup
        self.stream_log = stream_log
        self.set_labels(labels, deployer_type)
        self.pod_spec_mutators = pod_spec_mutators or []

    def set_labels(self, labels, deployer_type):
        self.labels = {'fairing-deployer': deployer_type}
        if labels:
            self.labels.update(labels)

    def deploy(self, pod_spec):
        self.job_id = str(uuid.uuid1())
        self.labels['fairing-id'] = self.job_id
        for fn in self.pod_spec_mutators:
            fn(self.backend, pod_spec, self.namespace)
        pod_template_spec = self.generate_pod_template_spec(pod_spec)
        pod_template_spec.spec.restart_policy = 'Never'
        self.deployment_spec = self.generate_deployment_spec(pod_template_spec)
        if self.output:
            api = k8s_client.ApiClient()
            job_output = api.sanitize_for_serialization(self.deployment_spec)
            print(json.dumps(job_output))

        name = self.create_resource()
        logger.warn("Training job {} launched.".format(name))

        if self.stream_log:
            self.get_logs()

    def create_resource(self):
        self._created_job = self.backend.create_job(self.namespace,
                                                    self.deployment_spec)
        return self._created_job.metadata.name

    def generate_pod_template_spec(self, pod_spec):
        """Generate a V1PodTemplateSpec initiazlied with correct metadata
            and with the provided pod_spec"""
        if not isinstance(pod_spec, k8s_client.V1PodSpec):
            raise TypeError('pod_spec must be a V1PodSpec, but got %s' %
                            type(pod_spec))
        return k8s_client.V1PodTemplateSpec(metadata=k8s_client.V1ObjectMeta(
            name="fairing-deployer", labels=self.labels),
                                            spec=pod_spec)

    def generate_deployment_spec(self, pod_template_spec):
        """Generate a V1Job initialized with correct completion and
         parallelism (for HP search) and with the provided V1PodTemplateSpec"""
        if not isinstance(pod_template_spec, k8s_client.V1PodTemplateSpec):
            raise TypeError("""pod_template_spec must be a V1PodTemplateSpec,
                but got %s""" % type(pod_template_spec))

        job_spec = k8s_client.V1JobSpec(template=pod_template_spec,
                                        parallelism=self.runs,
                                        completions=self.runs)

        return k8s_client.V1Job(api_version="batch/v1",
                                kind="Job",
                                metadata=k8s_client.V1ObjectMeta(
                                    generate_name=self.job_name,
                                    labels=self.labels,
                                ),
                                spec=job_spec)

    def get_logs(self):
        self.backend.log(self._created_job.metadata.name,
                         self._created_job.metadata.namespace, self.labels)

        if self.cleanup:
            self.do_cleanup()

    def do_cleanup(self):
        logger.warn("Cleaning up job {}...".format(
            self._created_job.metadata.name))
        k8s_client.BatchV1Api().delete_namespaced_job(
            self._created_job.metadata.name,
            self._created_job.metadata.namespace,
            body=k8s_client.V1DeleteOptions(propagation_policy='Foreground'))
Example #15
0
class KFServing(DeployerInterface):
    """
    Serves a prediction endpoint using Kubeflow KFServing.
    Attributes:
        framework: The framework for the kfservice, such as Tensorflow, XGBoost and ScikitLearn etc.
        default_model_uri: URI pointing to Saved Model assets for default service.
        canary_model_uri: URI pointing to Saved Model assets for canary service.
        canary_traffic_percent: The amount of traffic to sent to the canary, defaults to 0.
        namespace: The k8s namespace where the kfservice will be deployed.
        labels: Labels for the kfservice, separate with commas if have more than one.
        annotations: Annotations for the kfservice, separate with commas if have more than one.
        custom_default_spec: A flexible custom default specification for arbitrary customer provided containers.
        custom_canary_spec: A flexible custom canary specification for arbitrary customer provided containers.
        stream_log: Show log or not when kfservice started, defaults to True.
        cleanup: Delete the kfserving or not, defaults to False.
    """

    def __init__(self, framework, default_model_uri=None, canary_model_uri=None, canary_traffic_percent=0,
                 namespace=None, labels=None, annotations=None, custom_default_spec=None, 
                 custom_canary_spec=None, stream_log=True, cleanup=False):
        self.framework = framework
        self.default_model_uri = default_model_uri
        self.canary_model_uri = canary_model_uri
        self.canary_traffic_percent = canary_traffic_percent
        self.annotations = annotations
        self.set_labels(labels)
        self.cleanup = cleanup
        self.custom_default_spec = custom_default_spec
        self.custom_canary_spec = custom_canary_spec
        self.stream_log = stream_log
        self.backend = KubeManager()

        if namespace is None:
            self.namespace = utils.get_default_target_namespace()
        else:
            self.namespace = namespace

    def set_labels(self, labels):
        self.fairing_id = str(uuid.uuid1())
        self.labels = {'fairing-id': self.fairing_id}
        if labels:
            self.labels.update(labels)

    def deploy(self, template_spec):
        self.kfservice = self.generate_kfservice()
        self.created_kfserving = self.backend.create_kfserving(self.namespace, self.kfservice)
        if self.stream_log:
            self.get_logs()

        kfservice_name = self.created_kfserving['metadata']['name']
        logger.warn("Deployed the kfservice {} successfully.".format(kfservice_name))

        if self.cleanup:
            logger.warn("Cleaning up kfservice {}...".format(kfservice_name))
            self.backend.delete_kfserving(kfservice_name, self.namespace)

        return kfservice_name

    def generate_kfservice(self):

        spec = {}
        spec['default'] = {}
        if self.framework != 'custom':
            if self.default_model_uri != None:
                spec['default'][self.framework] = {}
                spec['default'][self.framework]['modelUri'] = self.default_model_uri
            else:
                raise RuntimeError("The default_model_uri must be defined if the framework is not custom.")
        else:
            if self.custom_default_spec != None:
                # TBD @jinchi Need to validate the custom_default_spec before executing.
                spec['default'][self.framework] = self.custom_default_spec                
            else:
                raise RuntimeError("The custom_default_spec must be defined if the framework is custom.")

        if self.framework != 'custom':
            if self.canary_model_uri != None:
                spec['canary'] = {}
                spec['canary'][self.framework] = {}
                spec['canary'][self.framework]['modelUri'] = self.canary_model_uri
                spec['canaryTrafficPercent'] = self.canary_traffic_percent
        else:
            if self.custom_default_spec != None:
                spec['canary'] = {}
                spec['canary'][self.framework] = self.custom_canary_spec
                spec['canaryTrafficPercent'] = self.canary_traffic_percent

        metadata=k8s_client.V1ObjectMeta(
                generate_name = constants.KFSERVING_DEFAULT_NAME,
                namespace = self.namespace,
                labels = self.labels,
                annotations = self.annotations
            )

        kfservice = {}
        kfservice['kind'] = constants.KFSERVING_KIND
        kfservice['apiVersion'] = constants.KFSERVING_GROUP + '/' + constants.KFSERVING_VERSION
        kfservice['metadata'] = metadata
        kfservice['spec'] = spec

        return kfservice

    def get_logs(self):
        name = self.created_kfserving['metadata']['name']
        namespace = self.created_kfserving['metadata']['namespace']

        self.backend.log(name, namespace, self.labels, 
                         container=constants.KFSERVING_CONTAINER_NAME, follow=False)