Exemple #1
0
def resnet_train(
        project_id,
        output,
        region='us-central1',
        model='bolts',
        version='beta1',
        tf_version='1.9',
        train_csv='gs://bolts_image_dataset/bolt_images_train.csv',
        validation_csv='gs://bolts_image_dataset/bolt_images_validate.csv',
        labels='gs://bolts_image_dataset/labels.txt',
        depth=50,
        train_batch_size=1024,
        eval_batch_size=1024,
        steps_per_eval=250,
        train_steps=10000,
        num_train_images=218593,
        num_eval_images=54648,
        num_label_classes=10):

    preprocess = resnet_preprocess_op(project_id, output, train_csv,
                                      validation_csv,
                                      labels).apply(gcp.use_gcp_secret())
    train = resnet_train_op(preprocess.output, output, region, depth,
                            train_batch_size, eval_batch_size, steps_per_eval,
                            train_steps, num_train_images, num_eval_images,
                            num_label_classes,
                            tf_version).apply(gcp.use_gcp_secret())
    deploy = resnet_deploy_op(train.output, model, version, project_id, region,
                              tf_version).apply(gcp.use_gcp_secret())
Exemple #2
0
def kubeflow_training(
        output,
        project,
        evaluation='gs://ml-pipeline-playground/flower/eval100.csv',
        train='gs://ml-pipeline-playground/flower/train200.csv',
        schema='gs://ml-pipeline-playground/flower/schema.json',
        learning_rate=0.1,
        hidden_layer_size='100,50',
        steps=2000,
        target='label',
        workers=0,
        pss=0,
        preprocess_mode='local',
        predict_mode='local'):
    # TODO: use the argo job name as the workflow
    workflow = '{{workflow.name}}'

    preprocess = dataflow_tf_transform_op(
        train, evaluation, schema, project, preprocess_mode, '',
        '%s/%s/transformed' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
    training = kubeflow_tf_training_op(
        preprocess.output, schema, learning_rate, hidden_layer_size, steps,
        target, '', '%s/%s/train' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
    prediction = dataflow_tf_predict_op(
        evaluation, schema, target, training.output, predict_mode, project,
        '%s/%s/predict' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
    confusion_matrix = confusion_matrix_op(
        prediction.output, '%s/%s/confusionmatrix' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
def kubeflow_training(
    data_file='gs://images_pama/txt/class_ch.txt',
    image_data_pack='gs://images_pama/center_housing.tar.gz',
    parser='simple',
    skip=False,
    num_epochs='2000',
    gcs_weight_path='gs://images_pama/model/class_ch_model_frcnn.hdf5',
    number_of_rois='32',
    network='resnet50',
    prediction_dir='gs://images_pama/config.pickle',
):
    # set the flag to use GPU trainer
    use_gpu = True

    training = kubeflow_tf_training_op(
        training_data_file=data_file,
        training_image_pack=image_data_pack,
        parser=parser,
        skip=skip,
        num_epochs=num_epochs,
        gcs_weight_path=gcs_weight_path
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))

    validation = kubeflow_tf_validation_op(
        validation_data_file=data_file,
        number_of_rois=number_of_rois,
        network=network,
        prediction_dir=prediction_dir,
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
Exemple #4
0
def kaggle_houseprice(bucket_name: str, commit_sha: str):
    import os
    stepDownloadData = dsl.ContainerOp(
        name='download dataset',
        image=os.path.join(args.gcr_address, 'kaggle_download:latest'),
        command=['python', 'download_data.py'],
        arguments=["--bucket_name", bucket_name],
        file_outputs={
            'train_dataset': '/train.txt',
            'test_dataset': '/test.txt'
        }).apply(use_gcp_secret('user-gcp-sa'))

    stepVisualizeTable = dsl.ContainerOp(
        name='visualize dataset in table',
        image=os.path.join(args.gcr_address, 'kaggle_visualize_table:latest'),
        command=['python', 'visualize.py'],
        arguments=[
            '--train_file_path',
            '%s' % stepDownloadData.outputs['train_dataset']
        ],
        output_artifact_paths={
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
        }).apply(use_gcp_secret('user-gcp-sa'))

    stepVisualizeHTML = dsl.ContainerOp(
        name='visualize dataset in html',
        image=os.path.join(args.gcr_address, 'kaggle_visualize_html:latest'),
        command=['python', 'visualize.py'],
        arguments=[
            '--train_file_path',
            '%s' % stepDownloadData.outputs['train_dataset'], '--commit_sha',
            commit_sha, '--bucket_name', bucket_name
        ],
        output_artifact_paths={
            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
        }).apply(use_gcp_secret('user-gcp-sa'))

    stepTrainModel = dsl.ContainerOp(
        name='train model',
        image=os.path.join(args.gcr_address, 'kaggle_train:latest'),
        command=['python', 'train.py'],
        arguments=[
            '--train_file',
            '%s' % stepDownloadData.outputs['train_dataset'], '--test_file',
            '%s' % stepDownloadData.outputs['test_dataset'], '--output_bucket',
            bucket_name
        ],
        file_outputs={
            'result': '/result_path.txt'
        }).apply(use_gcp_secret('user-gcp-sa'))

    stepSubmitResult = dsl.ContainerOp(
        name='submit result to kaggle competition',
        image=os.path.join(args.gcr_address, 'kaggle_submit:latest'),
        command=['python', 'submit_result.py'],
        arguments=[
            '--result_file',
            '%s' % stepTrainModel.outputs['result'], '--submit_message',
            'submit'
        ]).apply(use_gcp_secret('user-gcp-sa'))
def train_and_deploy_helper(preprocess, hparam_train):
    """Helper function called from the two pipeline functions"""

    # Step 3: Train the model some more, but on the pipelines cluster itself
    train_tuned = dsl.ContainerOp(
        name='traintuned',
        # image needs to be a compile-time string
        image=
        'gcr.io/ai-analytics-solutions/babyweight-pipeline-traintuned:latest',
        arguments=[
            hparam_train.outputs['jobname'], preprocess.outputs['bucket']
        ],
        file_outputs={
            'train': '/output.txt'
        }).apply(use_gcp_secret('user-gcp-sa'))
    train_tuned.set_memory_request('2G')
    train_tuned.set_cpu_request('1')

    # Step 4: Deploy the trained model to Cloud ML Engine
    deploy_cmle = dsl.ContainerOp(
        name='deploycmle',
        # image needs to be a compile-time string
        image=
        'gcr.io/ai-analytics-solutions/babyweight-pipeline-deploycmle:latest',
        arguments=[
            train_tuned.outputs['train'],  # modeldir
            'babyweight',
            'mlp'
        ],
        file_outputs={
            'model': '/model.txt',
            'version': '/version.txt'
        }).apply(use_gcp_secret('user-gcp-sa'))

    return deploy_cmle
def resnet_train(
        project_id,
        output,
        region='us-central1',
        model='bolts',
        version='beta1',
        tf_version='1.12',
        train_csv='gs://bolts_image_dataset/bolt_images_train.csv',
        validation_csv='gs://bolts_image_dataset/bolt_images_validate.csv',
        labels='gs://bolts_image_dataset/labels.txt',
        depth=50,
        train_batch_size=1024,
        eval_batch_size=1024,
        steps_per_eval=250,
        train_steps=10000,
        num_train_images=218593,
        num_eval_images=54648,
        num_label_classes=10):
    output_dir = os.path.join(str(output), '{{workflow.name}}')
    preprocess_staging = os.path.join(output_dir, 'staging')
    preprocess_output = os.path.join(output_dir, 'preprocessed_output')
    train_output = os.path.join(output_dir, 'model')
    preprocess = resnet_preprocess_op(
        project_id, preprocess_output, preprocess_staging, train_csv,
        validation_csv, labels, train_batch_size,
        eval_batch_size).apply(gcp.use_gcp_secret())
    train = resnet_train_op(project_id, preprocess_output, train_output,
                            region, depth, train_batch_size, eval_batch_size,
                            steps_per_eval, train_steps, num_train_images,
                            num_eval_images, num_label_classes,
                            tf_version).apply(gcp.use_gcp_secret())
    train.after(preprocess)
    export_output = os.path.join(str(train.outputs['job_dir']), 'export')
    deploy = resnet_deploy_op(export_output, model, version, project_id,
                              region, tf_version).apply(gcp.use_gcp_secret())
def kubeflow_training(
        output,
        project,
        evaluation='gs://ml-pipeline-playground/flower/eval100.csv',
        train='gs://ml-pipeline-playground/flower/train200.csv',
        schema='gs://ml-pipeline-playground/flower/schema.json',
        learning_rate=0.1,
        hidden_layer_size='100,50',
        steps=2000,
        target='label',
        workers=0,
        pss=0,
        preprocess_mode='local',
        predict_mode='local',
        optimizer=''):
    output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data'

    # set the flag to use GPU trainer
    use_gpu = False

    preprocess = dataflow_tf_transform_op(
        training_data_file_pattern=train,
        evaluation_data_file_pattern=evaluation,
        schema=schema,
        gcp_project=project,
        run_mode=preprocess_mode,
        preprocessing_module='',
        transformed_data_dir=output_template).apply(
            gcp.use_gcp_secret('user-gcp-sa'))

    training = kubeflow_tf_training_op(
        transformed_data_dir=preprocess.output,
        schema=schema,
        learning_rate=learning_rate,
        hidden_layer_size=hidden_layer_size,
        steps=steps,
        target=target,
        preprocessing_module='',
        optimizer='',
        training_output_dir=output_template).apply(
            gcp.use_gcp_secret('user-gcp-sa'))

    if use_gpu:
        training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:fe639f41661d8e17fcda64ff8242127620b80ba0',
        training.set_gpu_limit(1)

    prediction = dataflow_tf_predict_op(data_file_pattern=evaluation,
                                        schema=schema,
                                        target_column=target,
                                        model=training.output,
                                        run_mode=predict_mode,
                                        gcp_project=project,
                                        batch_size='',
                                        predictions_dir=output_template).apply(
                                            gcp.use_gcp_secret('user-gcp-sa'))

    confusion_matrix = confusion_matrix_op(
        predictions=prediction.output,
        target_lambda='',
        output_dir=output_template).apply(gcp.use_gcp_secret('user-gcp-sa'))
def xgb_train_pipeline(
    output,
    project,
    region='us-central1',
    train_data='gs://ml-pipeline-playground/sfpd/train.csv',
    eval_data='gs://ml-pipeline-playground/sfpd/eval.csv',
    schema='gs://ml-pipeline-playground/sfpd/schema.json',
    target='resolution',
    rounds=200,
    workers=2,
    true_label='ACTION',
):
  delete_cluster_op = DeleteClusterOp('delete-cluster', project, region).apply(gcp.use_gcp_secret('user-gcp-sa'))
  with dsl.ExitHandler(exit_op=delete_cluster_op):
    create_cluster_op = CreateClusterOp('create-cluster', project, region, output).apply(gcp.use_gcp_secret('user-gcp-sa'))

    analyze_op = AnalyzeOp('analyze', project, region, create_cluster_op.output, schema,
                           train_data, '%s/{{workflow.name}}/analysis' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

    transform_op = TransformOp('transform', project, region, create_cluster_op.output,
                               train_data, eval_data, target, analyze_op.output,
                               '%s/{{workflow.name}}/transform' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

    train_op = TrainerOp('train', project, region, create_cluster_op.output, transform_op.outputs['train'],
                         transform_op.outputs['eval'], target, analyze_op.output, workers,
                         rounds, '%s/{{workflow.name}}/model' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

    predict_op = PredictOp('predict', project, region, create_cluster_op.output, transform_op.outputs['eval'],
                           train_op.output, target, analyze_op.output, '%s/{{workflow.name}}/predict' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

    confusion_matrix_op = ConfusionMatrixOp('confusion-matrix', predict_op.output,
                                            '%s/{{workflow.name}}/confusionmatrix' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))

    roc_op = RocOp('roc', predict_op.output, true_label, '%s/{{workflow.name}}/roc' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))
def deploy(modeldir):
    deploy_cmle = dsl.ContainerOp(
        name='deploycmle',
        # image needs to be a compile-time string
        image=
        'gcr.io/tenacious-camp-267214/babyweight-pipeline-deploycmle:latest',
        arguments=[
            modeldir,  # modeldir
            'babyweight',
            'mlp'
        ],
        file_outputs={
            'model': '/model.txt',
            'version': '/version.txt'
        }).apply(use_gcp_secret('user-gcp-sa'))
    deploy_cmle.execution_options.caching_strategy.max_cache_staleness = "P0D"

    deploy_app = dsl.ContainerOp(
        name='deployapp',
        # image needs to be a compile-time string
        image=
        'gcr.io/tenacious-camp-267214/babyweight-pipeline-deployapp:latest',
        arguments=[
            deploy_cmle.outputs['model'], deploy_cmle.outputs['version']
        ],
        file_outputs={
            'appurl': '/appurl.txt'
        }).apply(use_gcp_secret('user-gcp-sa'))
    deploy_app.execution_options.caching_strategy.max_cache_staleness = "P0D"
Exemple #10
0
def automl1(  #pylint: disable=unused-argument
  # There's now a more succinct way to define the pipeline params
  project_id: dsl.PipelineParam = dsl.PipelineParam(name='project-id', value='YOUR_PROJECT_HERE'),
  compute_region: dsl.PipelineParam = dsl.PipelineParam(name='compute-region', value='YOUR_REGION_HERE'),
  dataset_name: dsl.PipelineParam = dsl.PipelineParam(name='dataset-name', value='YOUR_DATASETNAME_HERE'),
  model_name: dsl.PipelineParam = dsl.PipelineParam(name='model-name', value='YOUR_MODELNAME_HERE'),
  csv_path: dsl.PipelineParam = dsl.PipelineParam(name='csv-path', value='YOUR_DATASET_PATH')
  ):


  dataset = dsl.ContainerOp(
      name='dataset',
      image='gcr.io/google-samples/automl-pipeline',
      arguments=["--project_id", project_id, "--operation", DATASET_OP,
          "--compute_region", compute_region,
          "--dataset_name", dataset_name,
          "--csv_path", csv_path],
      file_outputs={'dataset_id': '/dataset_id.txt', 'csv_path': '/csv_path.txt'}

      ).apply(gcp.use_gcp_secret('user-gcp-sa'))

  model = dsl.ContainerOp(
      name='model',
      image='gcr.io/google-samples/automl-pipeline',
      arguments=["--project_id", project_id, "--operation", MODEL_OP,
          "--compute_region", compute_region,
          "--model_name", model_name,
          "--csv_path", dataset.outputs['csv_path'],
          "--dataset_id", dataset.outputs['dataset_id']]
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))

  model.after(dataset)
Exemple #11
0
def gh_summ(  #pylint: disable=unused-argument
    train_steps: 'Integer' = 2019300,
    project: String = 'YOUR_PROJECT_HERE',
    github_token: String = 'YOUR_GITHUB_TOKEN_HERE',
    working_dir: GCSPath = 'gs://YOUR_GCS_DIR_HERE',
    checkpoint_dir:
    GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000/',
    deploy_webapp: String = 'true',
    data_dir: GCSPath = 'gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/'
):

    copydata = copydata_op(
        data_dir=data_dir,
        checkpoint_dir=checkpoint_dir,
        model_dir='%s/%s/model_output' % (working_dir, dsl.RUN_ID_PLACEHOLDER),
        action=COPY_ACTION,
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))

    log_dataset = metadata_log_op(log_type=DATASET,
                                  workspace_name=WORKSPACE_NAME,
                                  run_name=dsl.RUN_ID_PLACEHOLDER,
                                  data_uri=data_dir)

    train = train_op(data_dir=data_dir,
                     model_dir=copydata.outputs['copy_output_path'],
                     action=TRAIN_ACTION,
                     train_steps=train_steps,
                     deploy_webapp=deploy_webapp).apply(
                         gcp.use_gcp_secret('user-gcp-sa'))

    log_model = metadata_log_op(log_type=MODEL,
                                workspace_name=WORKSPACE_NAME,
                                run_name=dsl.RUN_ID_PLACEHOLDER,
                                model_uri=train.outputs['train_output_path'])

    serve = dsl.ContainerOp(
        name='serve',
        image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve:v2',
        arguments=[
            "--model_name",
            'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER, ), "--model_path",
            train.outputs['train_output_path']
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))

    log_dataset.after(copydata)
    log_model.after(train)
    train.set_gpu_limit(1)
    train.set_memory_limit('48G')

    with dsl.Condition(train.outputs['launch_server'] == 'true'):
        webapp = dsl.ContainerOp(
            name='webapp',
            image='gcr.io/google-samples/ml-pipeline-webapp-launcher:v3ap',
            arguments=[
                "--model_name",
                'ghsumm-%s' % (dsl.RUN_ID_PLACEHOLDER, ), "--github_token",
                github_token
            ])
        webapp.after(serve)
Exemple #12
0
def kubeflow_training(
        output,
        project,
        evaluation='gs://ml-pipeline-playground/flower/eval100.csv',
        train='gs://ml-pipeline-playground/flower/train200.csv',
        schema='gs://ml-pipeline-playground/flower/schema.json',
        learning_rate=0.1,
        hidden_layer_size='100,50',
        steps=2000,
        target='label',
        workers=0,
        pss=0,
        preprocess_mode='local',
        predict_mode='local'):
    # TODO: use the argo job name as the workflow
    workflow = '{{workflow.name}}'
    # set the flag to use GPU trainer
    use_gpu = False

    preprocess = dataflow_tf_transform_op(
        training_data_file_pattern=train,
        evaluation_data_file_pattern=evaluation,
        schema=schema,
        gcp_project=project,
        run_mode=preprocess_mode,
        preprocessing_module='',
        transformed_data_dir='%s/%s/transformed' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))

    training = kubeflow_tf_training_op(transformed_data_dir=preprocess.output,
                                       schema=schema,
                                       learning_rate=learning_rate,
                                       hidden_layer_size=hidden_layer_size,
                                       steps=steps,
                                       target=target,
                                       preprocessing_module='',
                                       training_output_dir='%s/%s/train' %
                                       (output, workflow)).apply(
                                           gcp.use_gcp_secret('user-gcp-sa'))

    if use_gpu:
        training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:a277f87ea1d4707bf860d080d06639b7caf9a1cf',
        training.set_gpu_limit(1)

    prediction = dataflow_tf_predict_op(data_file_pattern=evaluation,
                                        schema=schema,
                                        target_column=target,
                                        model=training.output,
                                        run_mode=predict_mode,
                                        gcp_project=project,
                                        predictions_dir='%s/%s/predict' %
                                        (output, workflow)).apply(
                                            gcp.use_gcp_secret('user-gcp-sa'))

    confusion_matrix = confusion_matrix_op(
        predictions=prediction.output,
        output_dir='%s/%s/confusionmatrix' % (output, workflow)).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
Exemple #13
0
def chicago_taxi_pipeline():
    gs_download_training_data_in_csv = chicago_taxi_dataset_op(
        gcs_path='gs://kf-demo-data-bucket/taxi_data.csv'
    ).apply(use_gcp_secret('user-gcp-sa')).output

    training_data_for_regression_in_csv = pandas_transform_csv_op(
        table=gs_download_training_data_in_csv,
        transform_code='''df.insert(0, "was_tipped", df["tips"] > 0); del df["tips"]''',
    ).output

    regression_data_visualization = visualization_op(
        train_file_path=training_data_for_regression_in_csv
    ).apply(use_gcp_secret('user-gcp-sa'))
Exemple #14
0
def taxi_cab_classification(
    output,
    project,
    column_names='gs://ml-pipeline-playground/tfx/taxi-cab-classification/column-names.json',
    key_columns='trip_start_timestamp',
    train='gs://ml-pipeline-playground/tfx/taxi-cab-classification/train.csv',
    evaluation='gs://ml-pipeline-playground/tfx/taxi-cab-classification/eval.csv',
    mode='local',
    preprocess_module='gs://ml-pipeline-playground/tfx/taxi-cab-classification/preprocessing.py',
    learning_rate=0.1,
    hidden_layer_size='1500',
    steps=3000,
    analyze_slice_column='trip_start_hour'):

  tf_server_name = 'taxi-cab-classification-model-{{workflow.uid}}'
  validation = dataflow_tf_data_validation_op(train, evaluation, column_names, 
      key_columns, project, mode, output
  ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  preprocess = dataflow_tf_transform_op(train, evaluation, validation.outputs['schema'],
      project, mode, preprocess_module, output
  ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  training = tf_train_op(preprocess.output, validation.outputs['schema'], learning_rate,
      hidden_layer_size, steps, 'tips', preprocess_module, output
  ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  analysis = dataflow_tf_model_analyze_op(training.output, evaluation,
      validation.outputs['schema'], project, mode, analyze_slice_column, output
  ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  prediction = dataflow_tf_predict_op(evaluation, validation.outputs['schema'], 'tips',
      training.output, mode, project, output
  ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  cm = confusion_matrix_op(prediction.output, output).apply(gcp.use_gcp_secret('user-gcp-sa'))
  roc = roc_op(prediction.output, output).apply(gcp.use_gcp_secret('user-gcp-sa'))
  deploy = kubeflow_deploy_op(training.output, tf_server_name).apply(gcp.use_gcp_secret('user-gcp-sa'))
Exemple #15
0
def preprocess_train_deploy(
        bucket: str = '<bucket>',
        cutoff_year: str = '2010',
        tag: str = '4',
        model: str = 'DeepModel'
):
  """Pipeline to train financial time series model"""
  preprocess_op = Preprocess('preprocess', bucket, cutoff_year).apply(
    gcp.use_gcp_secret('user-gcp-sa'))
  #pylint: disable=unused-variable
  train_op = Train('train', preprocess_op.output, tag,
                   bucket, model).apply(gcp.use_gcp_secret('user-gcp-sa'))
  with dsl.Condition(train_op.outputs['accuracy'] > 0.7):
    deploy_op = Deploy('deploy', tag, bucket).apply(gcp.use_gcp_secret('user-gcp-sa'))
def mnist_pipeline(
    storage_bucket: str,
    output_path: str,
):
    import os
    train_op = components.load_component_from_file('./train/component.yaml')
    train_step = train_op(storage_bucket=storage_bucket).apply(
        use_gcp_secret('user-gcp-sa'))

    visualize_op = components.load_component_from_file(
        './tensorboard/component.yaml')
    visualize_step = visualize_op(logdir='%s' % train_step.outputs['logdir'],
                                  output_path=output_path).apply(
                                      use_gcp_secret('user-gcp-sa'))
Exemple #17
0
def preprocess_train_and_deploy(
    project='ai-analytics-solutions',
    bucket='ai-analytics-solutions-kfpdemo',
    start_year='2000'
):
    """End-to-end Pipeline to train and deploy babyweight model"""
    # Step 1: create training dataset using Apache Beam on Cloud Dataflow
    preprocess = dsl.ContainerOp(
          name='preprocess',
          # image needs to be a compile-time string
          image='gcr.io/ai-analytics-solutions/babyweight-pipeline-bqtocsv:latest',
          arguments=[
            '--project', project,
            '--mode', 'cloud',
            '--bucket', bucket,
            '--start_year', start_year
          ],
          file_outputs={'bucket': '/output.txt'}
      ).apply(use_gcp_secret('user-gcp-sa'))
    

    # Step 2: Do hyperparameter tuning of the model on Cloud ML Engine
    hparam_train = dsl.ContainerOp(
        name='hypertrain',
        # image needs to be a compile-time string
        image='gcr.io/ai-analytics-solutions/babyweight-pipeline-hypertrain:latest',
        arguments=[
            preprocess.outputs['bucket']
        ],
        file_outputs={'jobname': '/output.txt'}
      ).apply(use_gcp_secret('user-gcp-sa'))
    
    # core ML part of pipeline
    deploy_cmle = train_and_deploy_helper(preprocess, hparam_train, True)
    
    # Step 5: Deploy web app
    deploy_app = dsl.ContainerOp(
          name='deployapp',
          # image needs to be a compile-time string
          image='gcr.io/ai-analytics-solutions/babyweight-pipeline-deployapp:latest',
          arguments=[
            deploy_cmle.outputs['model'],
            deploy_cmle.outputs['version']
          ],
          file_outputs={
            'appurl': '/appurl.txt'
          }
        ).apply(use_gcp_secret('user-gcp-sa'))
def my_pipeline(
    clusterproject='sparkpubsub',
    cluster='spark',
    region='europe-west4',
    staging='output-sparkpubsub-tweets',
    project='sparkpubsub',
    tableproject='sparkpubsub',
    dataset='wordcount',
    table='wordcount_output',
    output='gs://output-sparkpubsub-tweets/output/data.csv',
):

    spark_task = spark_run_op(
        clusterproject=clusterproject,
        cluster=cluster,
        region=region,
        staging=staging,
        project=project,  # project of output bucket
        tableproject=tableproject,  # table to read
        dataset=dataset,
        table=table,
        output=output,  # path of output data
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))


# compile the bad boy
# dsl-compile --py pipeline.py  --output ./pipeline.tar.gz
Exemple #19
0
def evaluate_model_op(pitch_type, dummy1=None):
    return dsl.ContainerOp(name='Evaluate Models',
                           image='gcr.io/ross-kubeflow/evaluate-model:latest',
                           arguments=['--pitch_type', pitch_type],
                           file_outputs={
                               'data': '/root/dummy.txt',
                           }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def tf_train_op(transformed_data_dir,
                schema: 'GcsUri[text/json]',
                learning_rate: float,
                hidden_layer_size: int,
                steps: int,
                target: str,
                preprocess_module: 'GcsUri[text/code/python]',
                training_output: 'GcsUri[Directory]',
                step_name='training'):
    return dsl.ContainerOp(
        name=step_name,
        image=
        'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:0.1.3-rc.2',  #TODO-release: update the release tag for the next release
        arguments=[
            '--transformed-data-dir',
            transformed_data_dir,
            '--schema',
            schema,
            '--learning-rate',
            learning_rate,
            '--hidden-layer-size',
            hidden_layer_size,
            '--steps',
            steps,
            '--target',
            target,
            '--preprocessing-module',
            preprocess_module,
            '--job-dir',
            training_output,
        ],
        file_outputs={
            'train': '/output.txt'
        }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_tf_transform_op(train_data: 'GcsUri',
                             evaluation_data: 'GcsUri',
                             schema: 'GcsUri[text/json]',
                             project: 'GcpProject',
                             preprocess_mode,
                             preprocess_module: 'GcsUri[text/code/python]',
                             transform_output: 'GcsUri[Directory]',
                             step_name='preprocess'):
    return dsl.ContainerOp(
        name=step_name,
        image=
        'gcr.io/ml-pipeline/ml-pipeline-dataflow-tft:0.1.3-rc.2',  #TODO-release: update the release tag for the next release
        arguments=[
            '--train',
            train_data,
            '--eval',
            evaluation_data,
            '--schema',
            schema,
            '--project',
            project,
            '--mode',
            preprocess_mode,
            '--preprocessing-module',
            preprocess_module,
            '--output',
            transform_output,
        ],
        file_outputs={
            'transformed': '/output.txt'
        }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_tf_data_validation_op(inference_data: 'GcsUri',
                                   validation_data: 'GcsUri',
                                   column_names: 'GcsUri[text/json]',
                                   key_columns,
                                   project: 'GcpProject',
                                   mode,
                                   validation_output: 'GcsUri[Directory]',
                                   step_name='validation'):
    return dsl.ContainerOp(
        name=step_name,
        image=
        'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfdv:0.1.3-rc.2',  #TODO-release: update the release tag for the next release
        arguments=[
            '--csv-data-for-inference',
            inference_data,
            '--csv-data-to-validate',
            validation_data,
            '--column-names',
            column_names,
            '--key-columns',
            key_columns,
            '--project',
            project,
            '--mode',
            mode,
            '--output',
            validation_output,
        ],
        file_outputs={
            'output': '/output.txt',
            'schema': '/output_schema.json',
        }).apply(gcp.use_gcp_secret('user-gcp-sa'))
Exemple #23
0
def nyc_taxi_pipeline(project='kubeflow-xyz',
                      dataset='yellow_taxi',
                      bucket='gs://yellow-taxi-nyc',
                      start_date='2015-01-01',
                      end_date='2015-01-05'):
    extract = extract_op(project=project,
                         dataset=dataset,
                         bucket=bucket,
                         start_date=start_date,
                         end_date=end_date).apply(
                             gcp.use_gcp_secret('user-gcp-sa'))

    preprocessing = preprocessing_op(
        project=project,
        staging_bucket=extract.outputs['staging_bucket']).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
Exemple #24
0
def wbc_pipline(model_export_dir='export/wbc',
                data_root='data/segmentation_WBC-master',
                metadata_file_name='Class_Labels_of_{}.csv',
                subset='Dataset1',
                project='graphic-option-220202',
                bucket_name='kf-test1234',
                n_class="5",
                resume_model='export/wbc/NFCM_model.pth',
                epochs='50',
                batch_size='32',
                pvc_name=''):
    train = _train(data_root, metadata_file_name, subset, project, bucket_name,
                   n_class, epochs, batch_size,
                   model_export_dir)  # .set_gpu_limit(1)
    # train.add_node_selector_constraint('cloud.google.com/gke-nodepool', 'gpu-pool')
    # out = train.outputs['output']

    test = _test(data_root, metadata_file_name, subset, project, bucket_name,
                 n_class, resume_model, model_export_dir)
    test.after(train)

    steps = [train, test]
    for step in steps:
        if platform == 'GCP':
            step.apply(gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_function_embedding_op(
    cluster_name: str,
    function_embeddings_bq_table: str,
    function_embeddings_dir: str,
    namespace: str,
    num_workers: int,
    project: 'GcpProject',
    saved_model_dir: 'GcsUri',
    worker_machine_type: str,
    workflow_id: str,
    working_dir: str,
):
    return dsl.ContainerOp(
        name='dataflow_function_embedding',
        image=
        'gcr.io/kubeflow-examples/code-search/ks:v20181210-d7487dd-dirty-eb371e',
        command=['/usr/local/src/submit_code_embeddings_job.sh'],
        arguments=[
            "--cluster=%s" % cluster_name,
            "--dataDir=%s" % 'gs://code-search-demo/20181104/data',
            "--functionEmbeddingsDir=%s" % function_embeddings_dir,
            "--functionEmbeddingsBQTable=%s" % function_embeddings_bq_table,
            "--modelDir=%s" % saved_model_dir,
            "--namespace=%s" % namespace,
            "--numWorkers=%s" % num_workers,
            "--project=%s" % project,
            "--workerMachineType=%s" % worker_machine_type,
            "--workflowId=%s" % workflow_id,
            "--workingDir=%s" % working_dir,
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))
def kubeflow_training(
    output,
    project,
    test='gs://dataset-image-train/TFRecords/images/test_labels.csv',
    train='gs://dataset-image-train/TFRecords/images/train_labels.csv',
    # schema='gs://ml-pipeline-playground/flower/schema.json',
    learning_rate=0.1,
    hidden_layer_size='100,50',
    steps=2000,
    target='label',
    workers=0,
    pss=0,
    preprocess_mode='local',
    predict_mode='local',
):
    output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data'

    # set the flag to use GPU trainer
    use_gpu = False

    preprocess = dataflow_tf_transform_op(
        training_data_file_pattern=train,
        evaluation_data_file_pattern=test,
        #schema=schema,
        gcp_project=project,
        run_mode=preprocess_mode,
        preprocessing_module='',
        transformed_data_dir=output_template).apply(
            gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_tf_predict_op(evaluation_data: 'GcsUri',
                           schema: 'GcsUri[text/json]',
                           target: str,
                           model: 'TensorFlow model',
                           predict_mode,
                           project: 'GcpProject',
                           prediction_output: 'GcsUri',
                           step_name='prediction'):
    return dsl.ContainerOp(
        name=step_name,
        image=
        'gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:0.1.3-rc.2',  #TODO-release: update the release tag for the next release
        arguments=[
            '--data',
            evaluation_data,
            '--schema',
            schema,
            '--target',
            target,
            '--model',
            model,
            '--mode',
            predict_mode,
            '--project',
            project,
            '--output',
            prediction_output,
        ],
        file_outputs={
            'prediction': '/output.txt'
        }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def dataflow_tf_model_analyze_op(model: 'TensorFlow model',
                                 evaluation_data: 'GcsUri',
                                 schema: 'GcsUri[text/json]',
                                 project: 'GcpProject',
                                 analyze_mode,
                                 analyze_slice_column,
                                 analysis_output: 'GcsUri',
                                 step_name='analysis'):
    return dsl.ContainerOp(
        name=step_name,
        image=
        'gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:0.1.3-rc.2',  #TODO-release: update the release tag for the next release
        arguments=[
            '--model',
            model,
            '--eval',
            evaluation_data,
            '--schema',
            schema,
            '--project',
            project,
            '--mode',
            analyze_mode,
            '--slice-columns',
            analyze_slice_column,
            '--output',
            analysis_output,
        ],
        file_outputs={
            'analysis': '/output.txt'
        }).apply(gcp.use_gcp_secret('user-gcp-sa'))
def prepare_component(text_path: dsl.PipelineParam, out_pkl_path: dsl.PipelineParam):
    return kfp.dsl.ContainerOp(
        name='Prepare data component',
        image=f'{REGISTRY}/kf_prepare:1.0.0',
        arguments=['--text-path', text_path,
                   '--pkl-path', out_pkl_path]
    ).apply(use_gcp_secret('user-gcp-sa'))
Exemple #30
0
def dp_inf_pipe(
    model_name: dsl.PipelineParam = dsl.PipelineParam(name='model-name',
                                                      value='MODEL_NAME'),
    model_path: dsl.PipelineParam = dsl.PipelineParam(name='model-path',
                                                      value='MODEL_PATH'),
    num_gpus: dsl.PipelineParam = dsl.PipelineParam(name='num-gpus', value=0),

    #   pred_inp_dir: dsl.PipelineParam = dsl.PipelineParam(name='pred_inp_dir', value='INPUT DIRECTORY FOR PREDICTION'),
    #   model_location: dsl.PipelineParam = dsl.PipelineParam(name='model_location', value='TRAINED_MODEL_LOCATION'),
    #   inf_batch_size: dsl.PipelineParam = dsl.PipelineParam(name='inf_batch_size', value=10)
):

    tfserve = dsl.ContainerOp(
        name='tfserve',
        image='gcr.io/speedy-aurora-193605/retina-tfserve:latest',
        arguments=[
            "--model_name",
            model_name,
            "--model_path",
            model_path,
            "--num_gpus",
            num_gpus,
        ],
        # file_outputs={'output': '/tmp/output'}
    ).apply(gcp.use_gcp_secret('admin-gcp-sa'))
Exemple #31
0
def gh_summ(  #pylint: disable=unused-argument
  train_steps: dsl.PipelineParam = dsl.PipelineParam(name='train-steps', value=2019300),
  project: dsl.PipelineParam = dsl.PipelineParam(name='project', value='YOUR_PROJECT_HERE'),
  github_token: dsl.PipelineParam = dsl.PipelineParam(
      name='github-token', value='YOUR_GITHUB_TOKEN_HERE'),
  working_dir: dsl.PipelineParam = dsl.PipelineParam(name='working-dir', value='YOUR_GCS_DIR_HERE'),
  checkpoint_dir: dsl.PipelineParam = dsl.PipelineParam(
      name='checkpoint-dir',
      value='gs://aju-dev-demos-codelabs/kubecon/model_output_tbase.bak2019000'),
  deploy_webapp: dsl.PipelineParam = dsl.PipelineParam(name='deploy-webapp', value='true'),
  data_dir: dsl.PipelineParam = dsl.PipelineParam(
      name='data-dir', value='gs://aju-dev-demos-codelabs/kubecon/t2t_data_gh_all/')):


  train = dsl.ContainerOp(
      name='train',
      image='gcr.io/google-samples/ml-pipeline-t2ttrain',
      arguments=["--data-dir", data_dir,
          "--checkpoint-dir", checkpoint_dir,
          "--model-dir", '%s/%s/model_output' % (working_dir, '{{workflow.name}}'),
          "--train-steps", train_steps, "--deploy-webapp", deploy_webapp],
      file_outputs={'output': '/tmp/output'}

      ).apply(gcp.use_gcp_secret('user-gcp-sa'))

  serve = dsl.ContainerOp(
      name='serve',
      image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve',
      arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
          "--model_path", '%s/%s/model_output/export' % (working_dir, '{{workflow.name}}')
          ]
      )
  serve.after(train)
  train.set_gpu_limit(4)

  with dsl.Condition(train.output == 'true'):
    webapp = dsl.ContainerOp(
        name='webapp',
        image='gcr.io/google-samples/ml-pipeline-webapp-launcher',
        arguments=["--model_name", 'ghsumm-%s' % ('{{workflow.name}}',),
            "--github_token", github_token]

        )
    webapp.after(serve)
Exemple #32
0
  def __new__(cls, component_name, input_dict,
              output_dict,
              exec_properties):
    """Creates a new component.

    Args:
      component_name: TFX component name.
      input_dict: Dictionary of input names to TFX types, or
        kfp.dsl.PipelineParam representing input parameters.
      output_dict: Dictionary of output names to List of TFX types.
      exec_properties: Execution properties.

    Returns:
      Newly constructed TFX Kubeflow component instance.
    """
    outputs = output_dict.keys()
    file_outputs = {
        output: '/output/ml_metadata/{}'.format(output) for output in outputs
    }

    for k, v in ExecutionProperties.exec_properties.items():
      exec_properties[k] = v

    arguments = [
        '--exec_properties',
        json.dumps(exec_properties),
        '--outputs',
        types.jsonify_tfx_type_dict(output_dict),
        component_name,
    ]

    for k, v in input_dict.items():
      if isinstance(v, float) or isinstance(v, int):
        v = str(v)
      arguments.append('--{}'.format(k))
      arguments.append(v)

    container_op = dsl.ContainerOp(
        name=component_name,
        command=_COMMAND,
        image=_KUBEFLOW_TFX_IMAGE,
        arguments=arguments,
        file_outputs=file_outputs,
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))  # Adds GCP authentication.

    # Add the Argo workflow ID to the container's environment variable so it
    # can be used to uniquely place pipeline outputs under the pipeline_root.
    field_path = "metadata.labels['workflows.argoproj.io/workflow']"
    container_op.add_env_variable(
        k8s_client.V1EnvVar(
            name='WORKFLOW_ID',
            value_from=k8s_client.V1EnvVarSource(
                field_ref=k8s_client.V1ObjectFieldSelector(
                    field_path=field_path))))

    named_outputs = {output: container_op.outputs[output] for output in outputs}

    # This allows user code to refer to the ContainerOp 'op' output named 'x'
    # as op.outputs.x
    component_outputs = type('Output', (), named_outputs)

    return type(component_name, (BaseComponent,), {
        'container_op': container_op,
        'outputs': component_outputs
    })
def workflow1(
  input_handle_eval: dsl.PipelineParam=dsl.PipelineParam(name='input-handle-eval', value='gs://aju-dev-demos-codelabs/KF/taxidata/eval/data.csv'),
  input_handle_train: dsl.PipelineParam=dsl.PipelineParam(name='input-handle-train', value='gs://aju-dev-demos-codelabs/KF/taxidata/train/data.csv'),
  outfile_prefix_eval: dsl.PipelineParam=dsl.PipelineParam(name='outfile-prefix-eval', value='eval_transformed'),
  outfile_prefix_train: dsl.PipelineParam=dsl.PipelineParam(name='outfile-prefix-train', value='train_transformed'),
  train_steps: dsl.PipelineParam=dsl.PipelineParam(name='train-steps', value=10000),
  project: dsl.PipelineParam=dsl.PipelineParam(name='project', value='YOUR_PROJECT_HERE'),
  working_dir: dsl.PipelineParam=dsl.PipelineParam(name='working-dir', value='YOUR_GCS_DIR_HERE'),
  tft_setup_file: dsl.PipelineParam=dsl.PipelineParam(name='tft-setup-file', value='/ml/transform/setup.py'),
  tfma_setup_file: dsl.PipelineParam=dsl.PipelineParam(name='tfma-setup-file', value='/ml/analysis/setup.py'),
  workers: dsl.PipelineParam=dsl.PipelineParam(name='workers', value=1),
  pss: dsl.PipelineParam=dsl.PipelineParam(name='pss', value=1),
  max_rows: dsl.PipelineParam=dsl.PipelineParam(name='max-rows', value=10000),
  ts1: dsl.PipelineParam=dsl.PipelineParam(name='ts1', value=''),
  ts2: dsl.PipelineParam=dsl.PipelineParam(name='ts2', value=''),
  preprocessing_module1: dsl.PipelineParam=dsl.PipelineParam(name='preprocessing-module1', value='gs://aju-dev-demos-codelabs/KF/taxi-preproc/preprocessing.py'),
  preprocessing_module2: dsl.PipelineParam=dsl.PipelineParam(name='preprocessing-module2', value='gs://aju-dev-demos-codelabs/KF/taxi-preproc/preprocessing2.py'),
  preprocess_mode: dsl.PipelineParam=dsl.PipelineParam(name='preprocess-mode', value='local'),
  tfma_mode: dsl.PipelineParam=dsl.PipelineParam(name='tfma-mode', value='local')):

  tfteval = dsl.ContainerOp(
      name = 'tft-eval',
      image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
      arguments = [ "--input_handle", input_handle_eval, "--outfile_prefix", outfile_prefix_eval,
          "--working_dir", '%s/%s/tft-eval' % (working_dir, '{{workflow.name}}'),
          "--project", project,
          "--mode", preprocess_mode,
          "--setup_file", tft_setup_file,
          "--max-rows", '5000',
          "--ts1", ts1,
          "--ts2", ts2,
          "--stage", "eval",
          "--preprocessing-module", preprocessing_module1]
      # file_outputs = {'transformed': '/output.txt'}
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  tfttrain = dsl.ContainerOp(
      name = 'tft-train',
      image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
      arguments = [ "--input_handle", input_handle_train, "--outfile_prefix", outfile_prefix_train,
          "--working_dir", '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'),
          "--project", project,
          "--mode", preprocess_mode,
          "--setup_file", tft_setup_file,
          "--max_rows", max_rows,
          "--ts1", ts1,
          "--ts2", ts2,
          "--stage", "train",
          "--preprocessing_module", preprocessing_module1]
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  tfteval2 = dsl.ContainerOp(
      name = 'tft-eval2',
      image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
      arguments = [ "--input_handle", input_handle_eval, "--outfile_prefix", outfile_prefix_eval,
          "--working_dir", '%s/%s/tft-eval2' % (working_dir, '{{workflow.name}}'),
          "--project", project,
          "--mode", preprocess_mode,
          "--setup_file", tft_setup_file,
          "--max_rows", '5000',
          "--ts1", ts1,
          "--ts2", ts2,
          "--stage", "eval",
          "--preprocessing_module", preprocessing_module2]
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  tfttrain2 = dsl.ContainerOp(
      name = 'tft-train2',
      image = 'gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
      arguments = [ "--input_handle", input_handle_train, "--outfile_prefix", outfile_prefix_train,
          "--working_dir", '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'),
          "--project", project,
          "--mode", preprocess_mode,
          "--setup_file", tft_setup_file,
          "--max_rows", max_rows,
          "--ts1", ts1,
          "--ts2", ts2,
          "--stage", "train",
          "--preprocessing_module", preprocessing_module2]
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))

  train = dsl.ContainerOp(
      name = 'train',
      image = 'gcr.io/google-samples/ml-pipeline-kubeflow-tf-taxi',
      arguments = [ "--tf-transform-dir", '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'),
          "--output-dir", '%s/%s/tf' % (working_dir, '{{workflow.name}}'),
          "--working-dir", '%s/%s/tf/serving_model_dir' % (working_dir, '{{workflow.name}}'),
          "--job-dir", '%s/%s/tf' % (working_dir, '{{workflow.name}}'),
          "--train-files-dir", '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'),
          "--eval-files-dir", '%s/%s/tft-eval' % (working_dir, '{{workflow.name}}'),
          "--train-files-prefix", outfile_prefix_train,
          "--eval-files-prefix", outfile_prefix_eval,
          "--train-steps", train_steps,
          "--workers", workers,
          "--pss", pss]
      )
  train.after(tfteval)
  train.after(tfttrain)

  train2 = dsl.ContainerOp(
      name = 'train2',
      image = 'gcr.io/google-samples/ml-pipeline-kubeflow-tf-taxi',
      arguments = [ "--tf-transform-dir", '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'),
          "--output-dir", '%s/%s/tf2' % (working_dir, '{{workflow.name}}'),
          "--working-dir", '%s/%s/tf2/serving_model_dir' % (working_dir, '{{workflow.name}}'),
          "--job-dir", '%s/%s/tf2' % (working_dir, '{{workflow.name}}'),
          "--train-files-dir", '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'),
          "--eval-files-dir", '%s/%s/tft-eval2' % (working_dir, '{{workflow.name}}'),
          "--train-files-prefix", outfile_prefix_train,
          "--eval-files-prefix", outfile_prefix_eval,
          "--train-steps", train_steps,
          "--workers", '1',
          "--pss", '1']
      )
  train2.after(tfteval2)
  train2.after(tfttrain2)

  analyze = dsl.ContainerOp(
      name = 'analyze',
      image = 'gcr.io/google-samples/ml-pipeline-dataflow-tfma-taxi',
      arguments = ["--input_csv", input_handle_eval,
          "--tfma_run_dir", '%s/%s/tfma/output' % (working_dir, '{{workflow.name}}'),
          "--eval_model_dir", '%s/%s/tf/eval_model_dir' % (working_dir, '{{workflow.name}}'),
          "--mode", tfma_mode,
          "--setup_file", tfma_setup_file,
          "--project", project]
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  analyze2 = dsl.ContainerOp(
      name = 'analyze2',
      image = 'gcr.io/google-samples/ml-pipeline-dataflow-tfma-taxi',
      arguments = ["--input_csv", input_handle_eval,
          "--tfma_run_dir", '%s/%s/tfma2/output' % (working_dir, '{{workflow.name}}'),
          "--eval_model_dir", '%s/%s/tf2/eval_model_dir' % (working_dir, '{{workflow.name}}'),
          "--mode", tfma_mode,
          "--setup_file", tfma_setup_file,
          "--project", project]
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))

  cmleop = dsl.ContainerOp(
      name = 'cmleop',
      image = 'gcr.io/google-samples/ml-pipeline-cmle-op',
      arguments = ["--gcs-path", '%s/%s/tf/serving_model_dir/export/chicago-taxi' % (working_dir, '{{workflow.name}}'),
          "--version-name", '{{workflow.name}}',
          "--project", project]
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  cmleop2 = dsl.ContainerOp(
      name = 'cmleop2',
      image = 'gcr.io/google-samples/ml-pipeline-cmle-op',
      arguments = ["--gcs-path", '%s/%s/tf2/serving_model_dir/export/chicago-taxi' % (working_dir, '{{workflow.name}}'),
          "--version-name", '{{workflow.name}}_2',
          "--project", project]
      ).apply(gcp.use_gcp_secret('user-gcp-sa'))

  tfserving = dsl.ContainerOp(
      name = 'tfserving',
      image = 'gcr.io/google-samples/ml-pipeline-kubeflow-tfserve-taxi',
      arguments = ["--model_name", '{{workflow.name}}',
          "--model_path", '%s/%s/tf/serving_model_dir/export/chicago-taxi' % (working_dir, '{{workflow.name}}')]
      )
  tfserving2 = dsl.ContainerOp(
      name = 'tfserving2',
      image = 'gcr.io/google-samples/ml-pipeline-kubeflow-tfserve-taxi',
      arguments = ["--model_name", '{{workflow.name}}-2',
          "--model_path", '%s/%s/tf2/serving_model_dir/export/chicago-taxi' % (working_dir, '{{workflow.name}}')]
      )

  analyze.after(train)
  analyze2.after(train2)
  cmleop.after(train)
  cmleop2.after(train2)
  tfserving.after(train)
  tfserving2.after(train2)