def run():
    """Define a kubeflow pipeline."""

    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    pod_labels = kubeflow_dag_runner.get_default_pod_labels()
    pod_labels.update({telemetry_utils.LABEL_KFP_SDK_ENV: 'advert-pred'})
    kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config, pod_labels_to_attach=pod_labels).run(
            pipeline.create_pipeline(
                pipeline_name=PIPELINE_NAME,
                pipeline_root=PIPELINE_ROOT,
                data_path=DATA_PATH,
                preprocessing_fn=PREPROCESSING_FN,
                run_fn=RUN_FN,
                train_args=trainer_pb2.TrainArgs(num_steps=TRAIN_NUM_STEPS),
                eval_args=trainer_pb2.EvalArgs(num_steps=EVAL_NUM_STEPS),
                eval_accuracy_threshold=EVAL_ACCURACY_THRESHOLD,
                serving_model_dir=SERVING_MODEL_DIR,
            ))
Ejemplo n.º 2
0
def general_dist_pipeline(conn, query, dist_label):
    return pipe.create_pipeline(
        pipe.create_fetch_data(query, conn),
        pipe.create_set_index("how_out"),
        pipe.create_rename_label("counts", dist_label, axis=1),
        clean_data_pipeline,
        pipe.create_normalise_values(dist_label),
    )
Ejemplo n.º 3
0
def bowler_pipeline(conn):
    return pipe.create_pipeline(
        pipe.create_fetch_data(bowler_query, conn),
        pipe.transpose_df,
        pipe.create_rename_axis("how_out"),
        pipe.create_rename_label(0, "bowler_dist", axis=1),
        clean_data_pipeline,
        pipe.create_normalise_values("bowler_dist"),
    )
Ejemplo n.º 4
0
def main():

    try:
        SERVICE_HOME = sys.argv[1]

        # init spark
        spark = get_spark(app_name="sample")

        # get logger
        logger = get_logger(spark, "app")

        # load data
        df = spark.read.schema(get_train_schema()).option(
            'header',
            True).csv(SERVICE_HOME +
                      '/dataset/WA_Fn-UseC_-HR-Employee-Attrition.csv')

        # label preprocessing (only in training part)
        df = df.withColumn('label', str2num(
            F.col('Attrition'), {'No': 0, 'Yes': 1})) \
            .drop('Attrition')

        # seperate train and valid
        (train_data, valid_data) = df.randomSplit([0.8, 0.2])

        # preprocess(pipeline / non-pipeline) / training
        logger.info('preprocessing & training')
        stages = get_stages(train_data)
        rf = RandomForestRegressor(labelCol="label",
                                   featuresCol="features",
                                   numTrees=10)
        stages.append(rf)
        mypipeline = create_pipeline(stages)
        mymodel = mypipeline.fit(train_data)

        # get validation metric
        predictions = mymodel.transform(valid_data)
        evaluator = RegressionEvaluator(labelCol="label",
                                        predictionCol="prediction",
                                        metricName="rmse")
        rmse = evaluator.evaluate(predictions)
        logger.info('valid rmse: {}'.format(rmse))

        model_path = SERVICE_HOME + '/model'
        if os.path.exists(model_path):
            shutil.rmtree(model_path)
            logger.info('model exist, rm old model')
        mymodel.save(model_path)
        logger.info('save model to {}'.format(model_path))

    except Exception:
        logger.error(traceback.print_exc())

    finally:
        # stop spark
        spark.stop()
Ejemplo n.º 5
0
def main():
    parser = argparse.ArgumentParser()
    parser.add_argument('-auth',
                        type=str,
                        required=False,
                        default='credentials.json')
    parser.add_argument('-config',
                        type=str,
                        required=False,
                        default='trackerConfig.json')
    args = parser.parse_args()

    try:
        test_config = Configuration(args.config)
    except ValueError as err:
        print(f'{type(err).__name__}: {err}')
        return

    db = get_db_connection(args.auth)
    update_collections(db, test_config.refresh)

    collection = db[test_config.collection]
    pipeline = create_pipeline(test_config)

    with open('pipeline.json', 'w') as f:
        print(pipeline, file=f)

    result = collection.aggregate(pipeline).next()

    page = get_header()
    for n, t in enumerate(result):
        if test_config.analysis[n]['task'].get('stats') is not None:
            test_config.analysis[n]['task'].update(
                {"aggregation": test_config.aggregation})
            q = Query(task=test_config.analysis[n]['task'],
                      output=test_config.analysis[n]['output'],
                      data={"data": result[t]})
            page += create_table(q)
        else:
            q = Query(task=test_config.analysis[n]['task'],
                      output=test_config.analysis[n]['output'],
                      data=result[t][0])

        for key in q.output:
            if ('track' in q.task or 'ratio' in q.task) and key == 'table':
                page += get_table(q)
            if ('track' in q.task or 'ratio' in q.task) and key == 'graph':
                create_graph(q, n)
                page += f'<img src="graph{n}.png"></img>'

    with open(test_config.output_file, 'w') as f:
        f.write(page)

    print("done")
Ejemplo n.º 6
0
def node_tagged_with(context, node_name, tags):
    """
    Check tagging in `pipeline_template.py` is consistent with tagging
    descriptions in background steps
    """
    sys.path.append(
        str(context.root_project_dir / "src" / context.project_name.replace("-", "_"))
    )
    import pipeline  # pylint: disable=import-error

    context.project_pipeline = pipeline.create_pipeline()
    node_objs = [n for n in context.project_pipeline.nodes if n.name == node_name]
    assert node_objs
    assert set(tags) == node_objs[0].tags
def run():
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)
    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            bucket=BUCKET,
            csv_file=CSV_FILE,
            preprocessing_fn=configs.PREPROCESSING_FN,
            trainer_fn=configs.TRAINER_FN,
            train_args=configs.TRAIN_ARGS,
            eval_args=configs.EVAL_ARGS,
            serving_model_dir=SERVING_MODEL_DIR,
        ))
Ejemplo n.º 8
0
def run():
    """Define a beam pipeline."""

    BeamDagRunner().run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment here to use BigQueryExampleGen.
            # query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            trainer_fn=configs.TRAINER_FN,
            train_args=configs.TRAIN_ARGS,
            eval_args=configs.EVAL_ARGS,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment here to use provide GCP related
            #               config for BigQuery.
            # beam_pipeline_args=configs.BIG_QUERY_BEAM_PIPELINE_ARGS,
            metadata_connection_config=metadata.
            sqlite_metadata_connection_config(METADATA_PATH)))
Ejemplo n.º 9
0
def run():
    """Define a kubeflow pipeline."""

    # Metadata config. The defaults works work with the installation of
    # KF Pipelines using Kubeflow. If installing KF Pipelines using the
    # lightweight deployment option, you may need to override the defaults.
    # If you use Kubeflow, metadata will be written to MySQL database inside
    # Kubeflow cluster.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    # This pipeline automatically injects the Kubeflow TFX image if the
    # environment variable 'KUBEFLOW_TFX_IMAGE' is defined. Currently, the tfx
    # cli tool exports the environment variable to pass to the pipelines.
    tfx_image = os.environ.get('KUBEFLOW_TFX_IMAGE', None)

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config, tfx_image=tfx_image)

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=configs.PIPELINE_NAME,
            pipeline_root=PIPELINE_ROOT,
            #data_path=DATA_PATH,
            # TODO(step 7): (Optional) Uncomment below to use BigQueryExampleGen.
            query=configs.BIG_QUERY_QUERY,
            preprocessing_fn=configs.PREPROCESSING_FN,
            trainer_fn=configs.TRAINER_FN,
            train_args=configs.TRAIN_ARGS,
            eval_args=configs.EVAL_ARGS,
            serving_model_dir=SERVING_MODEL_DIR,
            # TODO(step 7): (Optional) Uncomment below to use provide GCP related
            #               config for BigQuery.
            beam_pipeline_args=configs.BIG_QUERY_BEAM_PIPELINE_ARGS,
            # TODO(step 8): (Optional) Uncomment below to use Dataflow.
            # beam_pipeline_args=configs.BEAM_PIPELINE_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_training_args=configs.GCP_AI_PLATFORM_TRAINING_ARGS,
            # TODO(step 9): (Optional) Uncomment below to use Cloud AI Platform.
            # ai_platform_serving_args=configs.GCP_AI_PLATFORM_SERVING_ARGS,
        ))
Ejemplo n.º 10
0
import os
from pathlib import Path

from tfx.orchestration import metadata
from tfx.orchestration import pipeline as pipeline_module
from tfx.orchestration.beam.beam_dag_runner import BeamDagRunner

from pipeline import create_pipeline

if __name__ == "__main__":
    pipeline_name = 'tfx-container-pipeline'

    tfx_root = Path(__file__).parent / 'tfx_root'
    pipeline_root = tfx_root / 'pipelines' / pipeline_name
    # Sqlite ML-metadata db path.
    metadata_path = tfx_root / 'metadata' / pipeline_name / 'metadata.db'

    components = create_pipeline()
    pipeline = pipeline_module.Pipeline(
        pipeline_name=pipeline_name,
        pipeline_root=pipeline_root.as_posix(),
        components=components,
        metadata_connection_config=metadata.sqlite_metadata_connection_config(
            metadata_path.as_posix()))

    BeamDagRunner().run(pipeline)
Ejemplo n.º 11
0
def create_get_dist_pipeline(conn):
    return pipe.create_pipeline(create_get_all_dists(conn), pipe.join_dists,
                                pipe.create_average_df(1))
Ejemplo n.º 12
0
def main(argv):
    del argv

    # Overwrite the use_cloud_pipelines flag if the compile_only flag set
    if FLAGS.compile_only:
        FLAGS.use_cloud_pipelines = True

    # Config executors
    if FLAGS.use_cloud_executors:
        ai_platform_training_args = {
            'project': FLAGS.project_id,
            'region': FLAGS.region,
            'masterConfig': {
                'imageUri': FLAGS.pipeline_image,
            }
        }
        trainer_custom_config = {
            ai_platform_trainer_executor.TRAINING_ARGS_KEY:
            ai_platform_training_args
        }
        trainer_custom_executor_spec = executor_spec.ExecutorClassSpec(
            ai_platform_trainer_executor.GenericExecutor)

        beam_pipeline_args = [
            '--runner=DataflowRunner', '--experiments=shuffle_mode=auto',
            '--project=' + FLAGS.project_id,
            '--temp_location=' + FLAGS.dataflow_temp_location,
            '--disk_size_gb=' + str(FLAGS.dataflow_disk_size),
            '--machine_type=' + FLAGS.dataflow_machine_type,
            '--region=' + FLAGS.region
        ]
    else:
        trainer_custom_config = None
        trainer_custom_executor_spec = executor_spec.ExecutorClassSpec(
            trainer_executor.GenericExecutor)

        beam_pipeline_args = [
            '--direct_running_mode=multi_processing',
            # 0 means auto-detect based on on the number of CPUs available
            # during execution time.
            '--direct_num_workers=0'
        ]

    # Config pipeline orchestrator
    if FLAGS.use_cloud_pipelines:
        metadata_connection_config = None
        data_root_uri = data_types.RuntimeParameter(
            name='data-root-uri', ptype=str, default=FLAGS.data_root_uri)
        schema_folder_uri = data_types.RuntimeParameter(
            name='schema-folder-uri',
            ptype=str,
            default=FLAGS.schema_folder_uri)
    else:
        metadata_connection_config = (sqlite_metadata_connection_config(
            FLAGS.sql_lite_path))
        data_root_uri = FLAGS.data_root_uri
        schema_folder_uri = FLAGS.schema_folder_uri

    # Create the pipeline
    pipeline_def = pipeline.create_pipeline(
        pipeline_name=FLAGS.pipeline_name,
        pipeline_root=FLAGS.pipeline_root,
        serving_model_uri=FLAGS.serving_model_uri,
        data_root_uri=data_root_uri,
        schema_folder_uri=schema_folder_uri,
        eval_steps=FLAGS.eval_steps,
        train_steps=FLAGS.train_steps,
        trainer_custom_executor_spec=trainer_custom_executor_spec,
        trainer_custom_config=trainer_custom_config,
        beam_pipeline_args=beam_pipeline_args,
        metadata_connection_config=metadata_connection_config)

    # Run or compile the pipeline
    if FLAGS.use_cloud_pipelines:
        logging.info(f'Compiling pipeline to: {FLAGS.pipeline_spec_path}')
        _compile_pipeline(pipeline_def=pipeline_def,
                          project_id=FLAGS.project_id,
                          pipeline_name=FLAGS.pipeline_name,
                          pipeline_image=FLAGS.pipeline_image,
                          pipeline_spec_path=FLAGS.pipeline_spec_path)
        if FLAGS.compile_only:
            return

        # Set runtime parameters
        parameter_values = {
            'data-root-uri': FLAGS.data_root_uri,
            'schema-folder-uri': FLAGS.schema_folder_uri,
        }

        # Submit the run
        logging.info('Submitting AI Platform Pipelines job ...')
        _submit_pipeline_run(project_id=FLAGS.project_id,
                             region=FLAGS.region,
                             api_key=FLAGS.api_key,
                             pipeline_spec_path=FLAGS.pipeline_spec_path,
                             pipeline_root=FLAGS.pipeline_root,
                             parameter_values=parameter_values)
    else:
        logging.info('Using local dag runner')
        LocalDagRunner().run(pipeline_def)
Ejemplo n.º 13
0
def main(argv):
    del argv

    beam_pipeline_args = [
            '--direct_running_mode=multi_processing',
            # 0 means auto-detect based on on the number of CPUs available
            # during execution time.
            '--direct_num_workers=0' ] 

    
    metadata_connection_config = None
    data_root_uri = data_types.RuntimeParameter( 
        name='data-root-uri',
        ptype=str,
        default=FLAGS.data_root_uri)
    
    eval_split_name = data_types.RuntimeParameter(
        name='eval-split-name',
        ptype=str,
        default='eval'
    )
     
    #output_config = example_gen_pb2.Output(
    #    split_config=example_gen_pb2.SplitConfig(splits=[
    #        example_gen_pb2.SplitConfig.Split(name=eval_split_name, hash_buckets=4),
    #        example_gen_pb2.SplitConfig.Split(name='test', hash_buckets=1)]))
    
    output_config = {
        "split_config": {
            "splits": [
                {
                    "name": "train",
                    "hash_buckets": 4
                },
                {
                    "name": eval_split_name,
                    "hash_buckets": 1
                }
            ]
        }
    }
  

    # Create the pipeline
    pipeline_def = pipeline.create_pipeline(
        pipeline_name=FLAGS.pipeline_name,
        pipeline_root=FLAGS.pipeline_root,
        data_root_uri=data_root_uri,
        output_config=output_config,
        beam_pipeline_args=beam_pipeline_args,
        metadata_connection_config=metadata_connection_config)

    logging.info(f'Compiling pipeline to: {FLAGS.pipeline_spec_path}')
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()
    
    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        # Specify custom docker image to use.
        # tfx_image=tfx_image
    )
    
    runner = kubeflow_dag_runner.KubeflowDagRunner(
        config=runner_config,
        output_filename=FLAGS.pipeline_spec_path)

    runner.run(pipeline_def)
Ejemplo n.º 14
0
    eval_steps = data_types.RuntimeParameter(name='eval-steps',
                                             default=500,
                                             ptype=int)

    pipeline_root = '{}/{}/{}'.format(Config.ARTIFACT_STORE_URI,
                                      Config.PIPELINE_NAME,
                                      kfp.dsl.RUN_ID_PLACEHOLDER)

    # Set KubeflowDagRunner settings.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        pipeline_operator_funcs=kubeflow_dag_runner.
        get_default_pipeline_operator_funcs(strtobool(Config.USE_KFP_SA)),
        tfx_image=Config.TFX_IMAGE)

    # Compile the pipeline.
    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_pipeline(pipeline_name=Config.PIPELINE_NAME,
                        pipeline_root=pipeline_root,
                        data_root_uri=data_root_uri,
                        train_steps=train_steps,
                        eval_steps=eval_steps,
                        enable_tuning=strtobool(Config.ENABLE_TUNING),
                        ai_platform_training_args=ai_platform_training_args,
                        ai_platform_serving_args=ai_platform_serving_args,
                        beam_pipeline_args=beam_pipeline_args))
Ejemplo n.º 15
0
_beam_tmp_folder = '{}/beam/tmp'.format(_artifact_store_uri)

_beam_pipeline_args = [
    '--runner=DataflowRunner',
    '--project=' + _project_id,
    '--temp_location=' + _beam_tmp_folder,
    '--region=' + _region,
]

# To run this pipeline from the python CLI:
#   $python taxi_pipeline_hello.py
if __name__ == '__main__':
    absl.logging.set_verbosity(absl.logging.INFO)

    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )
    pipeline_operator_funcs = kubeflow_dag_runner.get_default_pipeline_operator_funcs(
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        pipeline_operator_funcs=pipeline_operator_funcs,
        tfx_image=_tfx_image)

    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_pipeline(pipeline_name=_pipeline_name,
                        pipeline_root=_pipeline_root,
                        data_root=_data_root,
                        beam_pipeline_args=_beam_pipeline_args))
Ejemplo n.º 16
0
    inner join core_competition on core_match.competition_id=core_competition.id
where
    core_batperformance.bat_how_out not in ('not out', 'no', 'DNB')
and
    core_competition.league_id = {obj_id}
group by bat_how_out;
"""

#
# Pipelines
#

clean_data_pipeline = pipe.create_pipeline(
    pipe.create_add_missing_row("lbw", 0),
    pipe.create_add_missing_row("ct", 0),
    pipe.create_add_missing_row("b", 0),
    pipe.create_add_missing_row("run out", 0),
    pipe.create_add_missing_row("st", 0),
    pipe.create_rename_label("run out", "ro", axis=0),
)


def bowler_pipeline(conn):
    return pipe.create_pipeline(
        pipe.create_fetch_data(bowler_query, conn),
        pipe.transpose_df,
        pipe.create_rename_axis("how_out"),
        pipe.create_rename_label(0, "bowler_dist", axis=1),
        clean_data_pipeline,
        pipe.create_normalise_values("bowler_dist"),
    )
Ejemplo n.º 17
0
    eval_steps = data_types.RuntimeParameter(name='eval-steps',
                                             default=350,
                                             ptype=int)

    PIPELINE_ROOT = '{}/{}/{}'.format(Config.ARTIFACT_STORE_URI,
                                      Config.PIPELINE_NAME,
                                      kfp.dsl.RUN_ID_PLACEHOLDER)

    # Set KubeflowDagRunner settings.
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        pipeline_operator_funcs=kubeflow_dag_runner.
        get_default_pipeline_operator_funcs(strtobool(Config.USE_KFP_SA)),
        tfx_image=Config.TFX_IMAGE)

    # Compile the pipeline.
    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_pipeline(pipeline_name=Config.PIPELINE_NAME,
                        pipeline_root=PIPELINE_ROOT,
                        data_root_uri=data_root_uri,
                        train_steps=train_steps,
                        eval_steps=eval_steps,
                        enable_tuning=strtobool(Config.ENABLE_TUNING),
                        ai_platform_training_args=AI_PLATFORM_TRAINING_ARGS,
                        ai_platform_serving_args=AI_PLATFORM_SERVING_ARGS,
                        beam_pipeline_args=BEAM_PIPELINE_ARGS))
  )
    
  pipeline_root = f'{config.ARTIFACT_STORE_URI}/{config.PIPELINE_NAME}/{kfp.dsl.RUN_ID_PLACEHOLDER}'

  # Set KubeflowDagRunner settings
  metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config()

  runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
    kubeflow_metadata_config = metadata_config,
    pipeline_operator_funcs = kubeflow_dag_runner.get_default_pipeline_operator_funcs(
      config.USE_KFP_SA == 'True'),
    tfx_image=config.ML_IMAGE_URI
  )

  # Compile the pipeline
  kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
    pipeline.create_pipeline(
      pipeline_name=config.PIPELINE_NAME,
      pipeline_root=pipeline_root,
      project_id=config.PROJECT_ID,
      bq_dataset_name=config.BQ_DATASET_NAME,
      min_item_frequency=min_item_frequency,
      max_group_size=max_group_size,
      dimensions=dimensions,
      num_leaves=num_leaves,
      eval_min_recall=eval_min_recall,
      eval_max_latency=eval_max_latency,
      ai_platform_training_args=ai_platform_training_args,
      beam_pipeline_args=beam_pipeline_args,
      model_regisrty_uri=config.MODEL_REGISTRY_URI)
  )
Ejemplo n.º 19
0
                                              ptype=int)

    eval_steps = data_types.RuntimeParameter(name='eval-steps',
                                             default=500,
                                             ptype=int)

    pipeline_root = '{}/{}/{}'.format(Config.ARTIFACT_STORE_URI,
                                      Config.PIPELINE_NAME,
                                      kfp.dsl.RUN_ID_PLACEHOLDER)

    # Set KubeflowDagRunner settings
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        pipeline_operator_funcs=kubeflow_dag_runner.
        get_default_pipeline_operator_funcs(Config.USE_KFP_SA == 'True'),
        tfx_image=Config.TFX_IMAGE)

    # Compile the pipeline
    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        create_pipeline(pipeline_name=Config.PIPELINE_NAME,
                        pipeline_root=pipeline_root,
                        data_root_uri=data_root_uri,
                        train_steps=train_steps,
                        eval_steps=eval_steps,
                        ai_platform_training_args=ai_platform_training_args,
                        ai_platform_serving_args=ai_platform_serving_args,
                        beam_pipeline_args=beam_pipeline_args))
Ejemplo n.º 20
0
    accuracy_threshold = data_types.RuntimeParameter(name='accuracy-threshold',
                                                     default=0.75,
                                                     ptype=float)

    pipeline_root = '{}/{}/{}'.format(config.ARTIFACT_STORE_URI,
                                      config.PIPELINE_NAME,
                                      kfp.dsl.RUN_ID_PLACEHOLDER)

    # Set KubeflowDagRunner settings
    metadata_config = kubeflow_dag_runner.get_default_kubeflow_metadata_config(
    )

    runner_config = kubeflow_dag_runner.KubeflowDagRunnerConfig(
        kubeflow_metadata_config=metadata_config,
        pipeline_operator_funcs=kubeflow_dag_runner.
        get_default_pipeline_operator_funcs(config.USE_KFP_SA == 'True'),
        tfx_image=config.TFX_IMAGE)

    # Compile the pipeline
    kubeflow_dag_runner.KubeflowDagRunner(config=runner_config).run(
        pipeline.create_pipeline(
            pipeline_name=config.PIPELINE_NAME,
            pipeline_root=pipeline_root,
            dataset_name=config.DATASET_NAME,
            train_steps=train_steps,
            eval_steps=eval_steps,
            accuracy_threshold=accuracy_threshold,
            ai_platform_training_args=ai_platform_training_args,
            ai_platform_serving_args=ai_platform_serving_args,
            beam_pipeline_args=beam_pipeline_args,
            model_regisrty_uri=config.MODEL_REGISTRY_URI))