Ejemplo n.º 1
0
def find_pipeline_id(pipeline_name: str,
                     client: kfp.Client,
                     page_size: str = 100,
                     page_token: str = "") -> str:
    """Function to find the pipeline id of a pipeline. 

    Arguments:
        pipeline_name {str} -- The name of the pipeline of interest
        client {kfp.Client} -- The kfp client
        page_size {str} -- The number of pipelines to collect a each API request

    Keyword Arguments:
        page_token {str} -- The page token to use for the API request (default: {" "})

    Returns:
        [type] -- The pipeline id. If None no match
    """
    while True:
        pipelines = client.list_pipelines(page_size=page_size,
                                          page_token=page_token)
        for pipeline in pipelines.pipelines:
            if pipeline.name == pipeline_name:
                logging.info(f"The pipeline id is: {pipeline.id}")
                return pipeline.id
        # Start need to know where to do next itteration from
        page_token = pipelines.next_page_token
        # If no next tooken break
        if not page_token:
            logging.info(
                f"Could not find the pipeline, is the name: {pipeline_name} correct?"
            )
            break
Ejemplo n.º 2
0
def pipeline_id(client: kfp.Client, name: str):
    """Gets the ID of the kubeflow pipeline with the name 'name'
    Args:
      name of the pipeline
    Returns:
      id of the pipeline
    """
    page_token = ""
    while page_token is not None:
        p = client.list_pipelines(page_token=page_token, page_size=100)
        if p.pipelines is None:
            return ""
        for p in p.pipelines:
            if p.name == name:
                return p.id
        page_token = p.next_page_token
    return ""
Ejemplo n.º 3
0
class KubeflowClient(object):

    log = logging.getLogger(__name__)

    def __init__(self, config, project_name, context):
        token = AuthHandler().obtain_id_token()
        self.host = config.host
        self.client = Client(self.host, existing_token=token)
        self.project_name = project_name
        self.pipeline_description = config.run_config.description
        self.generator = PipelineGenerator(config, project_name, context)

    def list_pipelines(self):
        pipelines = self.client.list_pipelines(page_size=30).pipelines
        return tabulate(map(lambda x: [x.name, x.id], pipelines),
                        headers=["Name", "ID"])

    def run_once(
        self,
        pipeline,
        image,
        experiment_name,
        run_name,
        wait,
        image_pull_policy="IfNotPresent",
    ) -> None:
        run = self.client.create_run_from_pipeline_func(
            self.generator.generate_pipeline(pipeline, image,
                                             image_pull_policy),
            arguments={},
            experiment_name=experiment_name,
            run_name=run_name,
        )

        if wait:
            run.wait_for_run_completion(timeout=WAIT_TIMEOUT)

    def compile(self,
                pipeline,
                image,
                output,
                image_pull_policy="IfNotPresent"):
        Compiler().compile(
            self.generator.generate_pipeline(pipeline, image,
                                             image_pull_policy),
            output,
        )
        self.log.info("Generated pipeline definition was saved to %s" % output)

    def upload(self, pipeline, image, image_pull_policy="IfNotPresent"):
        pipeline = self.generator.generate_pipeline(pipeline, image,
                                                    image_pull_policy)

        if self._pipeline_exists(self.project_name):
            pipeline_id = self._get_pipeline_id(self.project_name)
            version_id = self._upload_pipeline_version(pipeline, pipeline_id)
            self.log.info("New version of pipeline created: %s", version_id)
        else:
            (pipeline_id, version_id) = self._upload_pipeline(pipeline)
            self.log.info("Pipeline created")

        self.log.info(
            f"Pipeline link: {self.host}/#/pipelines/details/%s/version/%s",
            pipeline_id,
            version_id,
        )

    def _pipeline_exists(self, pipeline_name):
        return self._get_pipeline_id(pipeline_name) is not None

    def _get_pipeline_id(self, pipeline_name):
        pipelines = self.client.pipelines.list_pipelines(filter=json.dumps({
            "predicates": [{
                "key": "name",
                "op": 1,
                "string_value": pipeline_name,
            }]
        })).pipelines

        if pipelines:
            return pipelines[0].id

    def _upload_pipeline_version(self, pipeline_func, pipeline_id):
        version_name = f"{clean_name(self.project_name)}-{uuid.uuid4()}"[:100]
        with NamedTemporaryFile(suffix=".yaml") as f:
            Compiler().compile(pipeline_func, f.name)
            return self.client.pipeline_uploads.upload_pipeline_version(
                f.name,
                name=version_name,
                pipelineid=pipeline_id,
                _request_timeout=10000,
            ).id

    def _upload_pipeline(self, pipeline_func):
        with NamedTemporaryFile(suffix=".yaml") as f:
            Compiler().compile(pipeline_func, f.name)
            pipeline = self.client.pipeline_uploads.upload_pipeline(
                f.name,
                name=self.project_name,
                description=self.pipeline_description,
                _request_timeout=10000,
            )
            return (pipeline.id, pipeline.default_version.id)

    def _ensure_experiment_exists(self, experiment_name):
        try:
            experiment = self.client.get_experiment(
                experiment_name=experiment_name)
            self.log.info(f"Existing experiment found: {experiment.id}")
        except ValueError as e:
            if not str(e).startswith("No experiment is found"):
                raise

            experiment = self.client.create_experiment(experiment_name)
            self.log.info(f"New experiment created: {experiment.id}")

        return experiment.id

    def schedule(self, experiment_name, cron_expression):
        experiment_id = self._ensure_experiment_exists(experiment_name)
        pipeline_id = self._get_pipeline_id(self.project_name)
        self._disable_runs(experiment_id, pipeline_id)
        self.client.create_recurring_run(
            experiment_id,
            f"{self.project_name} on {cron_expression}",
            cron_expression=cron_expression,
            pipeline_id=pipeline_id,
        )
        self.log.info("Pipeline scheduled to %s", cron_expression)

    def _disable_runs(self, experiment_id, pipeline_id):
        runs = self.client.list_recurring_runs(experiment_id=experiment_id)
        if runs.jobs is not None:
            my_runs = [
                job for job in runs.jobs
                if job.pipeline_spec.pipeline_id == pipeline_id
            ]
            for job in my_runs:
                self.client.jobs.delete_job(job.id)
                self.log.info(f"Previous schedule deleted {job.id}")
Ejemplo n.º 4
0
class KubeflowClient(object):

    log = logging.getLogger(__name__)

    def __init__(self, config, project_name, context):
        token = self.obtain_id_token()
        self.host = config.host
        self.client = Client(self.host, existing_token=token)
        self.project_name = project_name
        self.context = context
        dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
        self.volume_meta = config.run_config.volume

    def list_pipelines(self):
        pipelines = self.client.list_pipelines(page_size=30).pipelines
        return tabulate(map(lambda x: [x.name, x.id], pipelines),
                        headers=["Name", "ID"])

    def run_once(
        self,
        pipeline,
        image,
        experiment_name,
        run_name,
        wait,
        image_pull_policy="IfNotPresent",
    ) -> None:
        run = self.client.create_run_from_pipeline_func(
            self.generate_pipeline(pipeline, image, image_pull_policy),
            arguments={},
            experiment_name=experiment_name,
            run_name=run_name,
        )

        if wait:
            run.wait_for_run_completion(timeout=WAIT_TIMEOUT)

    def obtain_id_token(self):
        from google.auth.transport.requests import Request
        from google.oauth2 import id_token
        from google.auth.exceptions import DefaultCredentialsError

        client_id = os.environ.get(IAP_CLIENT_ID, None)

        jwt_token = None

        if not client_id:
            self.log.info(
                "No IAP_CLIENT_ID provided, skipping custom IAP authentication"
            )
            return jwt_token

        try:
            self.log.debug("Obtaining JWT token for %s." + client_id)
            jwt_token = id_token.fetch_id_token(Request(), client_id)
            self.log.info("Obtained JWT token for MLFLOW connectivity.")
        except DefaultCredentialsError as ex:
            self.log.warning(
                str(ex) +
                (" Note that this authentication method does not work with default"
                 " credentials obtained via 'gcloud auth application-default login'"
                 " command. Refer to documentation on how to configure service account"
                 " locally"
                 " (https://cloud.google.com/docs/authentication/production#manually)"
                 ))
        except Exception as e:
            self.log.error("Failed to obtain IAP access token. " + str(e))
        finally:
            return jwt_token

    def generate_pipeline(self, pipeline, image, image_pull_policy):
        @dsl.pipeline(
            name=self.project_name,
            description="Kubeflow pipeline for Kedro project",
        )
        def convert_kedro_pipeline_to_kfp() -> None:
            """Convert from a Kedro pipeline into a kfp container graph."""

            node_volumes = (_setup_volumes()
                            if self.volume_meta is not None else {})
            node_dependencies = self.context.pipelines.get(
                pipeline).node_dependencies
            kfp_ops = _build_kfp_ops(node_dependencies, node_volumes)
            for node, dependencies in node_dependencies.items():
                for dependency in dependencies:
                    kfp_ops[node.name].after(kfp_ops[dependency.name])

        def _setup_volumes():
            vop = dsl.VolumeOp(
                name="data-volume-create",
                resource_name="data-volume",
                size=self.volume_meta.size,
                modes=self.volume_meta.access_modes,
                storage_class=self.volume_meta.storageclass,
            )
            if self.volume_meta.skip_init:
                return {"/home/kedro/data": vop.volume}
            else:
                volume_init = dsl.ContainerOp(
                    name="data-volume-init",
                    image=image,
                    command=["sh", "-c"],
                    arguments=[
                        " ".join([
                            "cp",
                            "--verbose",
                            "-r",
                            "/home/kedro/data/*",
                            "/home/kedro/datavolume",
                        ])
                    ],
                    pvolumes={"/home/kedro/datavolume": vop.volume},
                )
                volume_init.container.set_image_pull_policy(image_pull_policy)
                return {"/home/kedro/data": volume_init.pvolume}

        def _build_kfp_ops(node_dependencies: Dict[Node, Set[Node]],
                           node_volumes: Dict) -> Dict[str, dsl.ContainerOp]:
            """Build kfp container graph from Kedro node dependencies. """
            kfp_ops = {}

            env = [
                V1EnvVar(name=IAP_CLIENT_ID,
                         value=os.environ.get(IAP_CLIENT_ID, ""))
            ]

            if is_mlflow_enabled():
                kfp_ops["mlflow-start-run"] = dsl.ContainerOp(
                    name="mlflow-start-run",
                    image=image,
                    command=["kedro"],
                    arguments=[
                        "kubeflow",
                        "mlflow-start",
                        dsl.RUN_ID_PLACEHOLDER,
                    ],
                    file_outputs={"mlflow_run_id": "/tmp/mlflow_run_id"},
                )
                kfp_ops["mlflow-start-run"].container.set_image_pull_policy(
                    image_pull_policy)
                env.append(
                    V1EnvVar(
                        name="MLFLOW_RUN_ID",
                        value=kfp_ops["mlflow-start-run"].output,
                    ))

            for node in node_dependencies:
                name = _clean_name(node.name)
                kfp_ops[node.name] = dsl.ContainerOp(
                    name=name,
                    image=image,
                    command=["kedro"],
                    arguments=["run", "--node", node.name],
                    pvolumes=node_volumes,
                    container_kwargs={"env": env},
                )
                kfp_ops[node.name].container.set_image_pull_policy(
                    image_pull_policy)

            return kfp_ops

        return convert_kedro_pipeline_to_kfp

    def compile(self,
                pipeline,
                image,
                output,
                image_pull_policy="IfNotPresent"):
        Compiler().compile(
            self.generate_pipeline(pipeline, image, image_pull_policy), output)
        self.log.info("Generated pipeline definition was saved to %s" % output)

    def upload(self, pipeline, image, image_pull_policy="IfNotPresent"):
        pipeline = self.generate_pipeline(pipeline, image, image_pull_policy)

        if self._pipeline_exists(self.project_name):
            pipeline_id = self._get_pipeline_id(self.project_name)
            version_id = self._upload_pipeline_version(pipeline, pipeline_id,
                                                       self.project_name)
            self.log.info("New version of pipeline created: %s", version_id)
        else:
            (pipeline_id,
             version_id) = self._upload_pipeline(pipeline, self.project_name)
            self.log.info("Pipeline created")

        self.log.info(
            f"Pipeline link: {self.host}/#/pipelines/details/%s/version/%s",
            pipeline_id,
            version_id,
        )

    def _pipeline_exists(self, pipeline_name):
        return self._get_pipeline_id(pipeline_name) is not None

    def _get_pipeline_id(self, pipeline_name):
        pipelines = self.client.pipelines.list_pipelines(filter=json.dumps({
            "predicates": [{
                "key": "name",
                "op": 1,
                "string_value": pipeline_name,
            }]
        })).pipelines

        if pipelines:
            return pipelines[0].id

    def _upload_pipeline_version(self, pipeline_func, pipeline_id,
                                 pipeline_name):
        version_name = f"{_clean_name(pipeline_name)}-{uuid.uuid4()}"[:100]
        with NamedTemporaryFile(suffix=".yaml") as f:
            Compiler().compile(pipeline_func, f.name)
            return self.client.pipeline_uploads.upload_pipeline_version(
                f.name, name=version_name, pipelineid=pipeline_id).id

    def _upload_pipeline(self, pipeline_func, pipeline_name):
        with NamedTemporaryFile(suffix=".yaml") as f:
            Compiler().compile(pipeline_func, f.name)
            pipeline = self.client.pipeline_uploads.upload_pipeline(
                f.name, name=pipeline_name)
            return (pipeline.id, pipeline.default_version.id)

    def _ensure_experiment_exists(self, experiment_name):
        try:
            experiment = self.client.get_experiment(
                experiment_name=experiment_name)
            self.log.info(f"Existing experiment found: {experiment.id}")
        except ValueError as e:
            if not str(e).startswith("No experiment is found"):
                raise

            experiment = self.client.create_experiment(experiment_name)
            self.log.info(f"New experiment created: {experiment.id}")

        return experiment.id

    def schedule(self, experiment_name, cron_expression):
        experiment_id = self._ensure_experiment_exists(experiment_name)
        pipeline_id = self._get_pipeline_id(self.project_name)
        self._disable_runs(experiment_id, pipeline_id)
        self.client.create_recurring_run(
            experiment_id,
            f"{self.project_name} on {cron_expression}",
            cron_expression=cron_expression,
            pipeline_id=pipeline_id,
        )
        self.log.info("Pipeline scheduled to %s", cron_expression)

    def _disable_runs(self, experiment_id, pipeline_id):
        runs = self.client.list_recurring_runs(experiment_id=experiment_id)
        if runs.jobs is not None:
            my_runs = [
                job for job in runs.jobs
                if job.pipeline_spec.pipeline_id == pipeline_id
            ]
            for job in my_runs:
                self.client.jobs.delete_job(job.id)
                self.log.info(f"Previous schedule deleted {job.id}")
Ejemplo n.º 5
0
class KubeflowClient:
    """
    A wrapper of the existing Kubeflow Pipelines Client which enriches it to 
    be able to access more of the Kubeflow Pipelines API.
    """
    def __init__(self,
                 host: Optional[str] = None,
                 client_id: Optional[str] = None,
                 namespace: Optional[str] = "kubeflow"):
        """
        Instandiate a new KubeflowClient

        Args:
            host (str): The host we can find the Kubeflow API at (e.g. https://{APP_NAME}.endpoints.{PROJECT_ID}.cloud.goog/pipeline)
            client_id (str): The IAP client id we can use for authorisate (e.g. "XXXXXX-XXXXXXXXX.apps.googleusercontent.com")
            namespace (str): The Kuberenetes / Kubeflow namespace to deploy to (e.g. kubeflow)
        """
        self.host = host
        self.client_id = client_id
        self.namespace = namespace

        logging.info(f"KubeflowClient: host: {host}, client_id: {client_id}")
        self.kfp_client = Client(host, client_id, namespace)

        self.config = self.kfp_client._load_config(self.host, self.client_id,
                                                   self.namespace, None, None)

        # print(f"kfp auth:")
        # print(f"\thost: {self.host}")
        # print(f"\tclient_id: {self.client_id}")
        # print(f"\tnamespace: {self.namespace}")
        # print(f"\tapi_key: {self.config.api_key}")
        self.kfp_pipelines = self._connect_pipelines_api()
        self.kfp_runs = self._connect_runs_api()
        self.kfp_jobs = self._connect_jobs_api()

    def create_pipeline(self, pipeline_func, pipeline_name):
        """
        Create a new Kubeflow Pipeline using the provided pipeline function

        Args:
            pipeline_func: The method decorated by @dsl.pipeline which defines the pipeline

        Returns:
            The Kubeflow Pipeline object created
        """

        try:
            (_, pipeline_package_path) = tempfile.mkstemp(suffix=".zip")
            compiler.Compiler().compile(pipeline_func, pipeline_package_path)

            logging.info(f"Compiled piopeline to: {pipeline_package_path}")

            return self.kfp_client.upload_pipeline(pipeline_package_path,
                                                   pipeline_name)
        finally:
            pass
            # os.remove(pipeline_package_path)

    def create_experiment(self, experiment_name):
        """
        Create a new Kubeflow Pipelines Experiment (grouping of pipeliens / runs)

        Args:
            experiment_name (str): The name of the experiment

        Returns:
            The Kubeflow experiement object created
        """
        return self.kfp_client.create_experiment(name=experiment_name)

    def list_experiments(self):
        """
        List the Experiments in the current namespace

        Returns:
            A list of all the Experiments
        """

        all_experiments = list()
        next_page_token = ""
        while next_page_token is not None:
            response = self.kfp_client.list_experiments(
                page_size=100, page_token=next_page_token)
            if response.experiments is None:
                break
            all_experiments.extend(response.experiments)
            next_page_token = response.next_page_token

        count = len(all_experiments)
        # print(f"list_experiments: found {count}")

        return all_experiments

    def find_job(self, job_name):
        """
        Look up a job by its name (in the current namespace).  Returns
        None if the job cannot be found

        Args:
            job_name (str): The name of the job to find

        Returns:
            A reference to the job if found, and None if not.
        """
        jobs = self.list_jobs()
        if jobs is None:
            return None

        for j in jobs:
            if j.name == job_name:
                return j
        return None

    def list_jobs(self):
        """
        List the Jobs in the current namespace

        Returns:
            A list of all the Jobs
        """

        all_jobs = list()
        next_page_token = ""
        while next_page_token is not None:
            response = self.kfp_jobs.list_jobs(page_size=100,
                                               page_token=next_page_token)
            if response.jobs is None:
                break
            all_jobs.extend(response.jobs)
            next_page_token = response.next_page_token

        count = len(all_jobs)
        # print(f"all_jobs: found {count}")

        return all_jobs

    def delete_job(self, job):
        """
        Delete a `Job` using its job.id

        Args:
            job (KubeflowJob): A `Job` object to delete

        Returns:
            True if the `Job` was deleted succesfully 
        """
        self.kfp_jobs.delete_job(id=job.id)
        return True

    def create_job(self,
                   name: str,
                   pipeline,
                   experiment,
                   description=None,
                   enabled=True,
                   max_concurrency=1,
                   cron=None):
        """
        Create a new Kubeflow Pipelines Job

        Args:
            name (str): The name of the `Job`
            pipeline (Pipeline): The `Pipeline` object to execute when the `Job` is called
            experiment (Experiment): The `Experiment` object to create the `Job` in.
            description (str): A description of what the `Job` is all about
            enabled (bool): Should be `Job` be enabled?
            max_concurrency (int): How many concurrent executions of the `Job` are allowed?
            cron (str): The CRON expression to use to execute the job periodicalls

        Returns:
            The Kubeflow API response object.
        """

        key = kfp_server_api.models.ApiResourceKey(
            id=experiment.id,
            type=kfp_server_api.models.ApiResourceType.EXPERIMENT)

        reference = kfp_server_api.models.ApiResourceReference(
            key, kfp_server_api.models.ApiRelationship.OWNER)

        spec = kfp_server_api.models.ApiPipelineSpec(pipeline_id=pipeline.id)

        trigger = None
        if cron is not None:
            cron_schedule = kfp_server_api.models.api_cron_schedule.ApiCronSchedule(
                cron=cron)
            trigger = kfp_server_api.models.api_trigger.ApiTrigger(
                cron_schedule=cron_schedule)

        run_body = kfp_server_api.models.ApiJob(
            name=name,
            description=description,
            pipeline_spec=spec,
            resource_references=[reference],
            enabled=True,
            trigger=trigger,
            max_concurrency=str(max_concurrency),
        )

        response = self.kfp_jobs.create_job(body=run_body)
        return response

    def list_runs(self, experiment_name):
        """
        List the `Runs` in the specified Exper`iment

        Args:
            experiment_name (str): The name of the `Experiment`

        Returns:
            A list of all the `Runs` in the current `Experiment`
        """
        experiment = self.get_experiment(experiment_name=experiment_name)
        all_runs = list()
        next_page_token = ""
        while next_page_token is not None:
            response = self.kfp_client.list_runs(page_size=100,
                                                 page_token=next_page_token)
            if response.runs is None:
                break
            all_runs.extend(response.runs)
            next_page_token = response.next_page_token

        run_count = len(all_runs)
        # print(f"list_runs: found {run_count}")
        return all_runs

    def list_pipelines(self):
        """
        List the `Pipelines` in the current namespace

        Returns:
            A list of all the `Pipelines` in the current `Experiment`
        """
        all_pipelines = list()
        response = self.kfp_client.list_pipelines(page_size=100)
        next_page_token = ""
        while next_page_token is not None:
            response = self.kfp_client.list_pipelines(
                page_size=100, page_token=next_page_token)
            if response.pipelines is None:
                break
            all_pipelines.extend(response.pipelines)
            next_page_token = response.next_page_token

        pipeline_count = len(all_pipelines)
        # print(f"list_pipelines: found {pipeline_count}")
        return all_pipelines

    def find_experiment(self, id=None, name=None):
        """
        Look up an `Experiment` by its name or id.  Returns
        None if the `Experiment` cannot be found.  Both `id` and
        `name` are optional, but atleast one must be provided.
        Where both a provided, the function will return with the
        first `Experiment` matching either id or name.

        Args:
            id (str): The `id` of the `Experiment` to find
            name (string): The `name` of the `Experiment` to find

        Returns:
            A reference to the `Experiment` if found, and None if not.
        """
        experiments = self.list_experiments()
        if experiments is None:
            return None
        for e in experiments:
            if e.name == name:
                return e
            if e.id == id:
                return e

        return None

    def find_pipeline(self, name):
        """
        Look up a `Pipeline`  by its name (in the current namespace).  Returns
        None if the `Pipeline` cannot be found

        Args:
            name (str): The name of the `Pipeline` to find

        Returns:
            A reference to the `Pipeline` if found, and `None` if not.
        """

        pipelines = self.list_pipelines()
        if pipelines is None:
            return None

        for p in pipelines:
            if p.name == name:
                return p
        return None

    def delete_pipeline(self, pipeline):
        """
        Delete the specified `Pipeline`

        Args:
            pipeline: The pipeline object to delete

        Returns:
            True if successfull
        """
        # Go through all my pipelines to find the one to delete
        self.kfp_pipelines.delete_pipeline(pipeline.id)
        return True

    def _connect_pipelines_api(self):
        """
            Create a new PipelineServiceApi client
        """
        api_client = kfp_server_api.api_client.ApiClient(self.config)
        pipelines_api = kfp_server_api.api.pipeline_service_api.PipelineServiceApi(
            api_client)
        return pipelines_api

    def _connect_runs_api(self):
        """
            Create a new PipelineServiceApi client
        """
        api_client = kfp_server_api.api_client.ApiClient(self.config)
        runs_api = kfp_server_api.api.run_service_api.RunServiceApi(api_client)
        return runs_api

    def _connect_jobs_api(self):
        """
            Create a new PipelineServiceApi client
        """
        api_client = kfp_server_api.api_client.ApiClient(self.config)
        runs_api = kfp_server_api.api.job_service_api.JobServiceApi(api_client)
        return runs_api