vertex_training_custom_config=vertex_training_custom_config,
        serving_model_dir=serving_model_dir,
        # ai_platform_serving_args=ai_platform_serving_args
    )

    p = pipeline.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root,
        components=components,
        beam_pipeline_args=beam_pipeline_args,
    )

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get(
        "KUBEFLOW_TFX_IMAGE",
        "gcr.io/oreilly-book/ml-pipelines-tfx-custom:latest",
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        tfx_image=tfx_image,
    )
Example #2
0
def main(argv):
    del argv

    beam_pipeline_args = [
            '--direct_running_mode=multi_processing',
            # 0 means auto-detect based on on the number of CPUs available
            # during execution time.
            '--direct_num_workers=0' ] 

    
    metadata_connection_config = None
    data_root_uri = data_types.RuntimeParameter( 
        name='data-root-uri',
        ptype=str,
        default=FLAGS.data_root_uri)
    
    eval_split_name = data_types.RuntimeParameter(
        name='eval-split-name',
        ptype=str,
        default='eval'
    )
     
    #output_config = example_gen_pb2.Output(
    #    split_config=example_gen_pb2.SplitConfig(splits=[
    #        example_gen_pb2.SplitConfig.Split(name=eval_split_name, hash_buckets=4),
    #        example_gen_pb2.SplitConfig.Split(name='test', hash_buckets=1)]))
    
    output_config = {
        "split_config": {
            "splits": [
                {
                    "name": "train",
                    "hash_buckets": 4
                },
                {
                    "name": eval_split_name,
                    "hash_buckets": 1
                }
            ]
        }
    }
  

    # Create the pipeline
    pipeline_def = pipeline.create_pipeline(
        pipeline_name=FLAGS.pipeline_name,
        pipeline_root=FLAGS.pipeline_root,
        data_root_uri=data_root_uri,
        output_config=output_config,
        beam_pipeline_args=beam_pipeline_args,
        metadata_connection_config=metadata_connection_config)

    logging.info(f'Compiling pipeline to: {FLAGS.pipeline_spec_path}')
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()
    
    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        # tfx_image=tfx_image
    )
    
    runner = kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config,
        output_filename=FLAGS.pipeline_spec_path)

    runner.run(pipeline_def)
Example #3
0
 def _get_kubeflow_metadata_config(
         self) -> kubeflow_pb2.KubeflowMetadataConfig:
     config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()
     return config
Example #4
0
 def _get_kubeflow_metadata_config(
         self, pipeline_name: Text) -> kubeflow_pb2.KubeflowMetadataConfig:
     config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()
     # Overwrite the DB name.
     config.mysql_db_name.value = self._get_mlmd_db_name(pipeline_name)
     return config