def run(): """Define a pipeline to be executed using Kubeflow V2 runner.""" runner_config = kubeflow_v2_dag_runner.KubeflowV2DagRunnerConfig( default_image=configs.PIPELINE_IMAGE) dsl_pipeline = pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=_PIPELINE_ROOT, data_path=_DATA_PATH, # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen. # query=configs.BIG_QUERY_QUERY, preprocessing_fn=configs.PREPROCESSING_FN, run_fn=configs.RUN_FN, train_args=trainer_pb2.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=_SERVING_MODEL_DIR, # TODO(step 7): (Optional) Uncomment here to use provide GCP related # config for BigQuery with Beam DirectRunner. # beam_pipeline_args=configs. # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, # TODO(step 8): (Optional) Uncomment below to use Dataflow. # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS, ) runner = kubeflow_v2_dag_runner.KubeflowV2DagRunner(config=runner_config) runner.run(pipeline=dsl_pipeline)
def run(): """Define a kubeflow pipeline.""" # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. # If you use Kubeflow, metadata will be written to MySQL database inside # Kubeflow cluster. metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config( ) # This pipeline automatically injects the Kubeflow TFX image if the # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx # cli tool exports the environment variable to pass to the pipelines. # TODO(b/157598477) Find a better way to pass parameters from CLI handler to # pipeline DSL file, instead of using environment vars. tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None) runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=tfx_image) pod_labels = kubeflow_dag_runner.get_default_pod_labels().update( {telemetry_utils.LABEL_KFP_SDK_ENV: 'tfx-template'}) kubeflow_dag_runner.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=PIPELINE_ROOT, data_path=DATA_PATH, # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen. # query=configs.BIG_QUERY_QUERY, preprocessing_fn=configs.PREPROCESSING_FN, run_fn=configs.RUN_FN, train_args=trainer_pb2.TrainArgs( num_steps=configs.TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=SERVING_MODEL_DIR, # TODO(step 7): (Optional) Uncomment below to use provide GCP related # config for BigQuery with Beam DirectRunner. # beam_pipeline_args=configs # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, # TODO(step 8): (Optional) Uncomment below to use Dataflow. # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS, ))
def run(): """Define a pipeline to be executed using Kubeflow V2 runner.""" # TODO(b/157598477) Find a better way to pass parameters from CLI handler to # pipeline DSL file, instead of using environment vars. tfx_image = os.environ.get(labels.TFX_IMAGE_ENV) project_id = os.environ.get(labels.GCP_PROJECT_ID_ENV) api_key = os.environ.get(labels.API_KEY_ENV) runner_config = kubeflow_v2_dag_runner.KubeflowV2DagRunnerConfig( project_id=project_id, display_name='tfx-kubeflow-v2-pipeline-{}'.format(configs.PIPELINE_NAME), default_image=tfx_image) dsl_pipeline = pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=_PIPELINE_ROOT, data_path=_DATA_PATH, # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen. # query=configs.BIG_QUERY_QUERY, preprocessing_fn=configs.PREPROCESSING_FN, run_fn=configs.RUN_FN, train_args=trainer_pb2.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS), eval_args=trainer_pb2.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=_SERVING_MODEL_DIR, # TODO(step 7): (Optional) Uncomment here to use provide GCP related # config for BigQuery with Beam DirectRunner. # beam_pipeline_args=configs. # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, # TODO(step 8): (Optional) Uncomment below to use Dataflow. # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS, ) runner = kubeflow_v2_dag_runner.KubeflowV2DagRunner( config=runner_config) if os.environ.get(labels.RUN_FLAG_ENV, False): # Only trigger the execution when invoked by 'run' command. runner.run( pipeline=dsl_pipeline, api_key=api_key) else: runner.compile(pipeline=dsl_pipeline, write_out=True)
def run(): """Define a kubeflow pipeline.""" # Metadata config. The defaults works work with the installation of # KF Pipelines using Kubeflow. If installing KF Pipelines using the # lightweight deployment option, you may need to override the defaults. # If you use Kubeflow, metadata will be written to MySQL database inside # Kubeflow cluster. metadata_config = tfx.orchestration.experimental.get_default_kubeflow_metadata_config( ) runner_config = tfx.orchestration.experimental.KubeflowDagRunnerConfig( kubeflow_metadata_config=metadata_config, tfx_image=configs.PIPELINE_IMAGE) pod_labels = { 'add-pod-env': 'true', tfx.orchestration.experimental.LABEL_KFP_SDK_ENV: 'tfx-template' } tfx.orchestration.experimental.KubeflowDagRunner( config=runner_config, pod_labels_to_attach=pod_labels ).run( pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=PIPELINE_ROOT, data_path=DATA_PATH, # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen. # query=configs.BIG_QUERY_QUERY, # TODO(step 5): (Optional) Set the path of the customized schema. # schema_path=generated_schema_path, preprocessing_fn=configs.PREPROCESSING_FN, run_fn=configs.RUN_FN, train_args=tfx.proto.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS), eval_args=tfx.proto.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=SERVING_MODEL_DIR, # TODO(step 7): (Optional) Uncomment below to use provide GCP related # config for BigQuery with Beam DirectRunner. # beam_pipeline_args=configs # .BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, # TODO(step 8): (Optional) Uncomment below to use Dataflow. # beam_pipeline_args=configs.DATAFLOW_BEAM_PIPELINE_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS, # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform. # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS, ))
def run(): """Define a local pipeline.""" tfx.orchestration.LocalDagRunner().run( pipeline.create_pipeline( pipeline_name=configs.PIPELINE_NAME, pipeline_root=PIPELINE_ROOT, data_path=DATA_PATH, # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen. # query=configs.BIG_QUERY_QUERY, preprocessing_fn=configs.PREPROCESSING_FN, run_fn=configs.RUN_FN, train_args=tfx.proto.TrainArgs(num_steps=configs.TRAIN_NUM_STEPS), eval_args=tfx.proto.EvalArgs(num_steps=configs.EVAL_NUM_STEPS), eval_accuracy_threshold=configs.EVAL_ACCURACY_THRESHOLD, serving_model_dir=SERVING_MODEL_DIR, # TODO(step 7): (Optional) Uncomment here to use provide GCP related # config for BigQuery with Beam DirectRunner. # beam_pipeline_args=configs. # BIG_QUERY_WITH_DIRECT_RUNNER_BEAM_PIPELINE_ARGS, metadata_connection_config=tfx.orchestration.metadata. sqlite_metadata_connection_config(METADATA_PATH)))