def run():
    """Define a kubeflow pipeline."""

    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'advert-pred'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels).run(
            pipeline.create_pipeline(
                pipeline_name=PIPELINE_NAME,
                pipeline_root=PIPELINE_ROOT,
                data_path=DATA_PATH,
                preprocessing_fn=PREPROCESSING_FN,
                run_fn=RUN_FN,
                train_args=trainer_pb2.TrainArgs(num_steps=TRAIN_NUM_STEPS),
                eval_args=trainer_pb2.EvalArgs(num_steps=EVAL_NUM_STEPS),
                eval_accuracy_threshold=EVAL_ACCURACY_THRESHOLD,
                serving_model_dir=SERVING_MODEL_DIR,
            ))
Exemple #2
0
    def _compile_and_run_pipeline(self, pipeline: tfx_pipeline.Pipeline,
                                  **kwargs):
        """Compiles and runs a KFP pipeline.

    In this method, provided TFX pipeline will be submitted via kfp.Client()
    instead of from Argo.

    Args:
      pipeline: The logical pipeline to run.
      **kwargs: Key-value pairs of runtime paramters passed to the pipeline
        execution.
    """
        client = kfp.Client(host=self._KFP_ENDPOINT)

        pipeline_name = pipeline.pipeline_info.pipeline_name
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=self._get_kubeflow_metadata_config(),
            tfx_image=self._CONTAINER_IMAGE)
        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(pipeline)

        file_path = os.path.join(self._test_dir,
                                 '{}.tar.gz'.format(pipeline_name))
        self.assertTrue(tf.io.gfile.exists(file_path))

        run_result = client.create_run_from_pipeline_package(
            pipeline_file=file_path, arguments=kwargs)
        run_id = run_result.run_id

        self._assert_successful_run_completion(host=self._KFP_ENDPOINT,
                                               run_id=run_id,
                                               pipeline_name=pipeline_name,
                                               timeout=self._TIME_OUT)
def main(unused_argv):
    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image)

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_pipeline(
            pipeline_name=_pipeline_name,
            pipeline_root=_pipeline_root,
            module_file=_module_file,
            ai_platform_training_args=_ai_platform_training_args,
            ai_platform_serving_args=_ai_platform_serving_args,
        ))
Exemple #4
0
def run():
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    os.environ[kubeflow_dag_runner.SDK_ENV_LABEL] = 'tfx-template'

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=config.PIPELINE_NAME,
            pipeline_root=pipeline_config.PIPELINE_ROOT_GCS,
            data_path=pipeline_config.DATA_PATH_KUBEFLOW,
            preprocessing_fn=config.PREPROCESSING_FN,
            run_fn=config.RUN_FN,
            train_args=trainer_pb2.TrainArgs(num_steps=config.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=config.EVAL_NUM_STEPS),
            eval_accuracy_threshold=config.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=pipeline_config.SERVING_MODEL_DIR_GCS,
            query=config.BIG_QUERY_QUERY,
            beam_pipeline_args=config.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # beam_pipeline_args=config.DATAFLOW_BEAM_PIPELINE_ARGS,
            # ai_platform_training_args=config.GCP_AI_PLATFORM_TRAINING_ARGS,
            # ai_platform_serving_args=config.GCP_AI_PLATFORM_SERVING_ARGS
        ))
def run(metadata_file: Optional[Text] = None):
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    metadata = get_metadata(metadata_file)
    system_config = get_config(metadata, "system_configurations")
    model_config = get_config(metadata, "model_configurations")
    # tfx_image = system_config.get("TFX_IMAGE", None)
    tfx_image = os.environ.get("KUBEFLOW_TFX_IMAGE", None)
    logging.info(f"Current tfx image used: {tfx_image}")

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        tfx_image=tfx_image,
        #pipeline_operator_funcs=([set_memory_request_and_limits(
        #    system_config["memory_request"], system_config["memory_limit"])]),
    )
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({
        telemetry_utils.LABEL_KFP_SDK_ENV:
        metadata["pipeline_name"] + "_" + metadata["pipeline_version"]
    })

    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=metadata["pipeline_name"] + "_" +
            metadata["pipeline_version"],
            pipeline_root=system_config["PIPELINE_ROOT"],
            query=model_config["query_script_path"],
            preprocessing_fn=system_config["preprocessing_fn"],
            run_fn=system_config["run_fn"],
            train_args=trainer_pb2.TrainArgs(splits=["train"], num_steps=100),
            eval_args=trainer_pb2.EvalArgs(splits=["train"], num_steps=50),
            model_serve_dir=system_config["MODEL_SERVE_DIR"],
            beam_pipeline_args=system_config["DATAFLOW_BEAM_PIPELINE_ARGS"],
            ai_platform_training_args=system_config[
                "GCP_AI_PLATFORM_TRAINING_ARGS"]
            if system_config["enable_gpc_ai_platform_training"] else None,
            # (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=system_config["GCP_AI_PLATFORM_SERVING_ARGS"],
            enable_cache=system_config["enable_cache"],
            system_config=system_config,  # passing config parameters downstream
            model_config=model_config,  # passing model parameters downstream
        ))
Exemple #6
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    # tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
    tfx_image = 'gcr.io/gcp-nyc/tfx-pipeline'

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            gcp_project=configs.GOOGLE_CLOUD_PROJECT,
            gcs_bucket=configs.GCS_BUCKET_NAME,
            tcga_betas_query=configs.TCGA_BETAS_QUERY,
            tcga_betas_output_schema=configs.TCGA_BETAS_OUTPUT_SCHEMA,
            tcga_betas_output_table_name=configs.TCGA_BETAS_OUTPUT_TABLE,
            cpg_sites_list_query=configs.CPG_SITES_LIST_QUERY,
            cpg_sites_list_output_schema=configs.CPG_SITES_OUTPUT_SCHEMA,
            cpg_sites_list_output_table_name=configs.CPG_SITES_OUTPUT_TABLE,
            pivot_query=configs.PIVOT_DATASET_QUERY,
            pivot_output_table=configs.PIVOT_OUTPUT_TABLE,
            final_dataset_query=configs.TRAIN_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            beam_pipeline_args=configs.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Exemple #7
0
def get_default_kubeflow_dag_runner():
  """Returns the default KubeflowDagRunner with its default metadata config."""

  metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()
  tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
  logging.info('Using "%s" as  the docker image.', tfx_image)
  runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
      kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

  return kubeflow_dag_runner.KubeflowDagRunner(config=runner_config)
Exemple #8
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=conf['kfp']['pipeline_name'],
            pipeline_root=conf['pipeline_root_dir'],
            data_path=conf['train_data'],
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            module_file='pjm_trainer.py',
            #   preprocessing_fn=configs.PREPROCESSING_FN,
            #   run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=conf['serving_model_dir'],
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs
            # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Exemple #9
0
    def _compile_and_run_pipeline(self,
                                  pipeline: tfx_pipeline.Pipeline,
                                  parameters: Dict[Text, Any] = None):
        """Compiles and runs a KFP pipeline.

    Args:
      pipeline: The logical pipeline to run.
      parameters: Value of runtime paramters of the pipeline.
    """
        pipeline_name = pipeline.pipeline_info.pipeline_name
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=self._get_kubeflow_metadata_config(
                pipeline_name),
            tfx_image=self._container_image)
        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(pipeline)

        file_path = os.path.join(self._test_dir,
                                 '{}.tar.gz'.format(pipeline_name))
        self.assertTrue(tf.io.gfile.exists(file_path))
        tarfile.TarFile.open(file_path).extract('pipeline.yaml')
        pipeline_file = os.path.join(self._test_dir, 'pipeline.yaml')
        self.assertIsNotNone(pipeline_file)

        # Ensure cleanup regardless of whether pipeline succeeds or fails.
        self.addCleanup(self._delete_workflow, pipeline_name)
        self.addCleanup(self._delete_pipeline_output, pipeline_name)
        self.addCleanup(self._delete_pipeline_metadata, pipeline_name)

        # Run the pipeline to completion.
        self._run_workflow(pipeline_file, pipeline_name, parameters)

        # Obtain workflow logs.
        get_logs_command = [
            'argo', '--namespace', 'kubeflow', 'logs', '-w', pipeline_name
        ]
        logs_output = subprocess.check_output(get_logs_command).decode('utf-8')

        # Check if pipeline completed successfully.
        get_workflow_command = [
            'argo', '--namespace', 'kubeflow', 'get', pipeline_name
        ]
        output = subprocess.check_output(get_workflow_command).decode('utf-8')

        self.assertIsNotNone(
            re.search(r'^Status:\s+Succeeded$', output, flags=re.MULTILINE),
            'Pipeline {} failed to complete successfully:\n{}'
            '\nFailed workflow logs:\n{}'.format(pipeline_name, output,
                                                 logs_output))
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    # Set the SDK type label environment.
    os.environ[kubeflow_dag_runner.SDK_ENV_LABEL] = 'tfx-template'

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs
            # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Exemple #11
0
    def _compile_and_run_pipeline(self,
                                  pipeline: tfx_pipeline.Pipeline,
                                  workflow_name: Text = None,
                                  parameters: Dict[Text, Any] = None):
        """Compiles and runs a KFP pipeline.

    Args:
      pipeline: The logical pipeline to run.
      workflow_name: The argo workflow name, default to pipeline name.
      parameters: Value of runtime paramters of the pipeline.
    """
        pipeline_name = pipeline.pipeline_info.pipeline_name
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=self._get_kubeflow_metadata_config(),
            tfx_image=self._CONTAINER_IMAGE)
        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(pipeline)

        file_path = os.path.join(self._test_dir,
                                 '{}.tar.gz'.format(pipeline_name))
        self.assertTrue(tf.io.gfile.exists(file_path))
        tarfile.TarFile.open(file_path).extract('pipeline.yaml')
        pipeline_file = os.path.join(self._test_dir, 'pipeline.yaml')
        self.assertIsNotNone(pipeline_file)

        workflow_name = workflow_name or pipeline_name
        # Ensure cleanup regardless of whether pipeline succeeds or fails.
        self.addCleanup(self._delete_workflow, workflow_name)
        self.addCleanup(self._delete_pipeline_metadata, pipeline_name)
        self.addCleanup(self._delete_pipeline_output, pipeline_name)

        # Run the pipeline to completion.
        self._run_workflow(pipeline_file, workflow_name, parameters)

        # Obtain workflow logs.
        get_logs_command = [
            'argo', '--namespace', 'kubeflow', 'logs', '-w', workflow_name
        ]
        logs_output = subprocess.check_output(get_logs_command).decode('utf-8')

        # Check if pipeline completed successfully.
        status = self._get_argo_pipeline_status(workflow_name)
        self.assertEqual(
            'Succeeded', status,
            'Pipeline {} failed to complete successfully: {}'
            '\nFailed workflow logs:\n{}'.format(pipeline_name, status,
                                                 logs_output))
def run():
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            bucket=BUCKET,
            csv_file=CSV_FILE,
            preprocessing_fn=configs.PREPROCESSING_FN,
            trainer_fn=configs.TRAINER_FN,
            train_args=configs.TRAIN_ARGS,
            eval_args=configs.EVAL_ARGS,
            serving_model_dir=SERVING_MODEL_DIR,
        ))
    def _setup_pipeline_parameters_from_env(self):
        self.LOCAL_LOG_DIR = self.env_config.LOCAL_LOG_DIR
        self.PIPELINE_NAME = self.env_config.PIPELINE_NAME
        self.ENABLE_CACHE = strtobool(self.env_config.ENABLE_CACHE)
        self.LOCAL_RUN = strtobool(self.env_config.LOCAL_RUN)

        self.TFX_IMAGE = self.env_config.TFX_IMAGE
        self.RUNTIME_VERSION = self.env_config.RUNTIME_VERSION
        self.PYTHON_VERSION = self.env_config.PYTHON_VERSION
        self.USE_KFP_SA = self.env_config.USE_KFP_SA

        self.DATA_ROOT_URI = self.env_config.DATA_ROOT_URI

        # properties applicable for local run
        self.HOME = self.env_config.HOME
        self.code_folder = self.env_config.CODE_FOLDER
        self.LOCAL_ARTIFACT_STORE = self.env_config.LOCAL_ARTIFACT_STORE
        self.LOCAL_SERVING_MODEL_DIR = self.env_config.LOCAL_SERVING_MODEL_DIR
        self.LOCAL_PIPELINE_ROOT = self.env_config.LOCAL_PIPELINE_ROOT
        self.LOCAL_METADATA_PATH = self.env_config.LOCAL_METADATA_PATH

        self._set_additional_cloud_properties()

        self.trainer_config = TrainerConfig.from_config(
            config=self.env_config,
            ai_platform_training_args=self.ai_platform_training_args)
        self.tuner_config = TunerConfig.from_config(
            config=self.env_config, ai_platform_tuner_args=None)
        self.pusher_config = PusherConfig.from_config(
            config=self.env_config,
            serving_model_dir=self.LOCAL_SERVING_MODEL_DIR,
            ai_platform_serving_args=self.ai_platform_serving_args)
        # Set the default values for the pipeline runtime parameters
        self.runtime_parameters_config = RuntimeParametersConfig.from_config(
            config=self.env_config)

        metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
        )

        self.runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=metadata_config,
            pipeline_operator_funcs=kubeflow_dag_runner.
            get_default_pipeline_operator_funcs(strtobool(self.USE_KFP_SA)),
            tfx_image=self.env_config.TFX_IMAGE)
    def testVolumeMountingPipelineOperatorFuncs(self):
        mount_volume_op = onprem.mount_pvc('my-persistent-volume-claim',
                                           'my-volume-name',
                                           '/mnt/volume-mount-path')
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            pipeline_operator_funcs=[mount_volume_op])

        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(
            _two_step_pipeline())
        file_path = 'two_step_pipeline.tar.gz'
        self.assertTrue(fileio.exists(file_path))

        with tarfile.TarFile.open(file_path).extractfile(
                'pipeline.yaml') as pipeline_file:
            self.assertIsNotNone(pipeline_file)
            pipeline = yaml.safe_load(pipeline_file)

            container_templates = [
                c for c in pipeline['spec']['templates'] if 'container' in c
            ]
            self.assertEqual(2, len(container_templates))

            volumes = [{
                'name': 'my-volume-name',
                'persistentVolumeClaim': {
                    'claimName': 'my-persistent-volume-claim'
                }
            }]

            # Check that the PVC is specified for kfp<=0.1.31.1.
            if 'volumes' in pipeline['spec']:
                self.assertEqual(volumes, pipeline['spec']['volumes'])

            for template in container_templates:
                # Check that each container has the volume mounted.
                self.assertEqual([{
                    'name': 'my-volume-name',
                    'mountPath': '/mnt/volume-mount-path'
                }], template['container']['volumeMounts'])

                # Check that each template has the PVC specified for kfp>=0.1.31.2.
                if 'volumes' in template:
                    self.assertEqual(volumes, template['volumes'])
def run():
    """Define a kubeflow pipeline."""

    # Metadata config.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-timeseries'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        timeseries_pipeline.create_pipeline(
            pipeline_name=config.PIPELINE_NAME,
            enable_cache=True,
            run_fn='timeseries.encoder_decoder.encoder_decoder_run_fn.run_fn',
            preprocessing_fn=
            'timeseries.encoder_decoder.encoder_decoder_preprocessing.preprocessing_fn',
            data_path=DATA_PATH,
            pipeline_root=PIPELINE_ROOT,
            serving_model_dir=os.path.join(config.PIPELINE_ROOT, os.pathsep),
            train_args=trainer_pb2.TrainArgs(num_steps=3360),
            eval_args=trainer_pb2.EvalArgs(num_steps=56),
            beam_pipeline_args=config.GCP_DATAFLOW_ARGS,
            trainer_custom_config={
                'train_batches': 500,
                'eval_batches': 250,
                'training_example_count': 28000,
                'eval_example_count': 14000,
                'timesteps': config.MODEL_CONFIG['timesteps'],
                'number_features': 6,
                'outer_units': 16,
                'inner_units': 4
            },
            transformer_custom_config=config.MODEL_CONFIG,
        ))
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    # TODO(b/157598477) Find a better way to pass parameters from CLI handler to
    # pipeline DSL file, instead of using environment vars.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # NOTE: Use `query` instead of `data_path` to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # NOTE: Provide GCP configs to use BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs.
            # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
        ))
Exemple #17
0
    def testPatcher(self):
        given_image_name = 'foo/bar'
        built_image_name = 'foo/bar@sha256:1234567890'

        mock_build_image_fn = mock.MagicMock(return_value=built_image_name)
        patcher = kubeflow_dag_runner_patcher.KubeflowDagRunnerPatcher(
            call_real_run=True,
            build_image_fn=mock_build_image_fn,
            use_temporary_output_file=True)
        runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            tfx_image=given_image_name)
        runner = kubeflow_dag_runner.KubeflowDagRunner(config=runner_config)
        pipeline = tfx_pipeline.Pipeline('dummy', 'dummy_root')
        with patcher.patch() as context:
            runner.run(pipeline)
        self.assertTrue(context[patcher.USE_TEMPORARY_OUTPUT_FILE])
        self.assertIn(patcher.OUTPUT_FILE_PATH, context)

        mock_build_image_fn.assert_called_once_with(given_image_name)
        self.assertEqual(runner_config.tfx_image, built_image_name)
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            #data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            trainer_fn=configs.TRAINER_FN,
            train_args=configs.TRAIN_ARGS,
            eval_args=configs.EVAL_ARGS,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery.
            beam_pipeline_args=configs.BIG_QUERY_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Exemple #19
0
def _compile_pipeline(pipeline_def, 
                     project_id,
                     pipeline_name,
                     pipeline_image,
                     pipeline_spec_path):
    """Compiles the pipeline."""

    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()
    
    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
      kubeflow_metadata_config=metadata_config,
      # Specify custom docker image to use.
      # tfx_image=tfx_image
    )
    
    runner = kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config,
        output_filename=pipeline_spec_path)

    # Compile the pipeline
    runner.run(pipeline_def)
    def testVolumeMountingPipelineOperatorFuncs(self):
        mount_volume_op = onprem.mount_pvc('my-persistent-volume-claim',
                                           'my-volume-name',
                                           '/mnt/volume-mount-path')
        config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            pipeline_operator_funcs=[mount_volume_op])

        kubeflow_dag_runner.KubeflowDagRunner(config=config).run(
            _two_step_pipeline())
        file_path = os.path.join(self.test_dir, 'two_step_pipeline.tar.gz')
        self.assertTrue(tf.gfile.Exists(file_path))

        with tarfile.TarFile.open(file_path).extractfile(
                'pipeline.yaml') as pipeline_file:
            self.assertIsNotNone(pipeline_file)
            pipeline = yaml.load(pipeline_file)

            containers = [
                c for c in pipeline['spec']['templates'] if 'container' in c
            ]
            self.assertEqual(2, len(containers))

            # Check that each container has the volume mounted.
            self.assertEqual([{
                'name': 'my-volume-name',
                'mountPath': '/mnt/volume-mount-path'
            }], containers[0]['container']['volumeMounts'])

            self.assertEqual([{
                'name': 'my-volume-name',
                'mountPath': '/mnt/volume-mount-path'
            }], containers[1]['container']['volumeMounts'])

            # Check that the PVC is specified.
            self.assertEqual([{
                'name': 'my-volume-name',
                'persistentVolumeClaim': {
                    'claimName': 'my-persistent-volume-claim'
                }
            }], pipeline['spec']['volumes'])
def main(unused_argv):
    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image)

    if FLAGS.distributed_training:
        _ai_platform_training_args.update({
            # You can specify the machine types, the number of replicas for workers
            # and parameter servers.
            # https://cloud.google.com/ml-engine/reference/rest/v1/projects.jobs#ScaleTier
            'scaleTier': 'CUSTOM',
            'masterType': 'large_model',
            'workerType': 'standard',
            'parameterServerType': 'standard',
            'workerCount': 2,
            'parameterServerCount': 1
        })

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        _create_pipeline(
            pipeline_name=_pipeline_name,
            pipeline_root=_pipeline_root,
            query=_query,
            module_file=_module_file,
            beam_pipeline_args=_beam_pipeline_args,
            ai_platform_training_args=_ai_platform_training_args,
            ai_platform_serving_args=_ai_platform_serving_args,
        ))
def main(unused_argv):
    serving_model_dir = os.path.join(FLAGS.project_root, 'serving_model',
                                     FLAGS.pipeline_name)

    module_file = os.path.join(FLAGS.project_root, 'titanic_keras_utils.py')
    # Root directory to store pipeline artifacts.
    pipeline_root = os.path.join(FLAGS.project_root, 'pipeline')
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image,
        pipeline_operator_funcs=(
            # If running on K8s Engine (GKE) on Google Cloud Platform (GCP),
            # kubeflow_dag_runner.get_default_pipeline_operator_funcs() provides
            # default configurations specifically for GKE on GCP, such as secrets.
            [
                onprem.mount_pvc(_persistent_volume_claim, _persistent_volume,
                                 _persistent_volume_mount)
            ]))

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_tfx_pipeline(
            pipeline_name=FLAGS.pipeline_name,
            pipeline_root=pipeline_root,
            data_root=FLAGS.data_root,
            module_file=module_file,
            serving_model_dir=serving_model_dir,
            # 0 means auto-detect based on on the number of CPUs available during
            # execution time.
            direct_num_workers=0))
    def testMountGcpServiceAccount(self):
        kubeflow_dag_runner.KubeflowDagRunner(
            config=kubeflow_dag_runner.KubeflowDagRunnerConfig(
                pipeline_operator_funcs=kubeflow_dag_runner.
                get_default_pipeline_operator_funcs(use_gcp_sa=True))).run(
                    _two_step_pipeline())
        file_path = 'two_step_pipeline.tar.gz'
        self.assertTrue(fileio.exists(file_path))

        with tarfile.TarFile.open(file_path).extractfile(
                'pipeline.yaml') as pipeline_file:
            self.assertIsNotNone(pipeline_file)
            pipeline = yaml.safe_load(pipeline_file)

            containers = [
                c for c in pipeline['spec']['templates'] if 'container' in c
            ]
            self.assertEqual(2, len(containers))

            # Check that each container has default GCP credentials.

            container_0 = containers[0]
            env = [
                env for env in container_0['container']['env']
                if env['name'] == 'GOOGLE_APPLICATION_CREDENTIALS'
            ]
            self.assertEqual(1, len(env))
            self.assertEqual('/secret/gcp-credentials/user-gcp-sa.json',
                             env[0]['value'])

            container_1 = containers[0]
            env = [
                env for env in container_1['container']['env']
                if env['name'] == 'GOOGLE_APPLICATION_CREDENTIALS'
            ]
            self.assertEqual(1, len(env))
            self.assertEqual('/secret/gcp-credentials/user-gcp-sa.json',
                             env[0]['value'])
Exemple #24
0
def main(_):
    pipeline = generate_pipeline(flags.FLAGS.pipeline_name,
                                 flags.FLAGS.pipeline_root,
                                 flags.FLAGS.train_data, flags.FLAGS.test_data,
                                 flags.FLAGS.train_steps,
                                 flags.FLAGS.eval_steps,
                                 flags.FLAGS.pusher_target, flags.FLAGS.runner)

    if flags.FLAGS.runner == 'local':
        BeamDagRunner().run(pipeline)
    #elif flags.FLAGS.runner == 'flink':
    # need to slightly change TFX codes to support other Beam-runners
    # BeamDagRunner(pipelineOptions).run(pipeline)
    elif flags.FLAGS.runner == 'kubeflow':
        metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
        )
        tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)
        runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
            kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
        kubeflow_dag_runner.KubeflowDagRunner(
            config=runner_config).run(pipeline)
    else:
        exit(1)
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels().update(
        {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            query=configs.BIG_QUERY_QUERY,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            serving_model_dir=SERVING_MODEL_DIR,
            beam_pipeline_args=configs.
            BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
            ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Exemple #26
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        tfx_image=configs.PIPELINE_IMAGE)
    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels
    ).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # NOTE: Use `query` instead of `data_path` to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            run_fn=configs.RUN_FN,
            train_args=trainer_pb2.TrainArgs(
                num_steps=configs.TRAIN_NUM_STEPS),
            eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS),
            eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD,
            serving_model_dir=SERVING_MODEL_DIR,
            # NOTE: Provide GCP configs to use BigQuery with Beam DirectRunner.
            # beam_pipeline_args=configs.
            # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS,
        ))
Exemple #27
0
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image,
        pipeline_operator_funcs=(
            # If running on K8s Engine (GKE) on Google Cloud Platform (GCP),
            # kubeflow_dag_runner.get_default_pipeline_operator_funcs() provides
            # default configurations specifically for GKE on GCP, such as secrets.
            [
                onprem.mount_pvc(_persistent_volume_claim, _persistent_volume,
                                 _persistent_volume_mount)
            ]))

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        _create_pipeline(
            pipeline_name=_pipeline_name,
            pipeline_root=_pipeline_root,
            data_root=_data_root,
            module_file=_module_file,
            serving_model_dir=_serving_model_dir,
            # 0 means auto-detect based on on the number of CPUs available during
            # execution time.
Exemple #28
0
                                              ptype=int)

    eval_steps = data_types.RuntimeParameter(name='eval-steps',
                                             default=500,
                                             ptype=int)

    pipeline_root = '{}/{}/{}'.format(Config.ARTIFACT_STORE_URI,
                                      Config.PIPELINE_NAME,
                                      kfp.dsl.RUN_ID_PLACEHOLDER)

    # Set KubeflowDagRunner settings
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        pipeline_operator_funcs=kubeflow_dag_runner.
        get_default_pipeline_operator_funcs(Config.USE_KFP_SA == 'True'),
        tfx_image=Config.TFX_IMAGE)

    # Compile the pipeline
    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_pipeline(pipeline_name=Config.PIPELINE_NAME,
                        pipeline_root=pipeline_root,
                        data_root_uri=data_root_uri,
                        train_steps=train_steps,
                        eval_steps=eval_steps,
                        ai_platform_training_args=ai_platform_training_args,
                        ai_platform_serving_args=ai_platform_serving_args,
                        beam_pipeline_args=beam_pipeline_args))
  return pipeline.Pipeline(
      pipeline_name=pipeline_name,
      pipeline_root=pipeline_root,
      components=[
          example_gen, statistics_gen, infer_schema, validate_stats, transform,
          trainer, model_analyzer, model_validator, pusher
      ],
      enable_cache=enable_cache,
      # TODO(b/141578059): The multi-processing API might change.
      beam_pipeline_args=['--direct_num_workers=%d' % direct_num_workers],
  )


if __name__ == '__main__':
  _enable_cache = True
  pipeline = _create_parameterized_pipeline(
      _pipeline_name, _pipeline_root, enable_cache=_enable_cache)

  # This pipeline automatically injects the Kubeflow TFX image if the
  # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
  # cli tool exports the environment variable to pass to the pipelines.
  tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

  config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
      kubeflow_metadata_config=kubeflow_dag_runner
      .get_default_kubeflow_metadata_config(),
      tfx_image=tfx_image)
  kfp_runner = kubeflow_dag_runner.KubeflowDagRunner(config=config)

  kfp_runner.run(pipeline)
        "docker.io/doctorai/ml-pipelines-tfx-custom:0.22.0",
    )

    from pipelines.base_pipeline import init_components

    components = init_components(data_dir,
                                 module_file,
                                 50000,
                                 10000,
                                 serving_model_dir=serving_model_dir)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image,
        pipeline_operator_funcs=(
            # If running on K8s Engine (GKE) on Google Cloud Platform (GCP),
            # kubeflow_dag_runner.get_default_pipeline_operator_funcs()
            # provides default configurations specifically for GKE on GCP,
            # such as secrets.
            kubeflow_dag_runner.get_default_pipeline_operator_funcs()),
    )

    p = init_kubeflow_pipeline(components, output_base, direct_num_workers=0)
    output_filename = f"{pipeline_name}.yaml"
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config,
        output_dir=output_dir,
        output_filename=output_filename,
    ).run(p)