Exemple #1
0
def mnist_pipeline(model_export_dir='gs://your-bucket/export',
                   train_steps='200',
                   learning_rate='0.01',
                   batch_size='100',
                   pvc_name=''):
  """
  Pipeline with three stages:
    1. train an MNIST classifier
    2. deploy a tf-serving instance to the cluster
    3. deploy a web-ui to interact with it
  """
  train = dsl.ContainerOp(
      name='train',
      image='gcr.io/kubeflow-examples/mnist/model:v20190304-v0.2-176-g15d997b',
      arguments=[
          "/opt/model.py",
          "--tf-export-dir", model_export_dir,
          "--tf-train-steps", train_steps,
          "--tf-batch-size", batch_size,
          "--tf-learning-rate", learning_rate
          ]
  )


  serve_args = [
      '--model-export-path', model_export_dir,
      '--server-name', "mnist-service"
  ]
  if platform != 'GCP':
    serve_args.extend([
        '--cluster-name', "mnist-pipeline",
        '--pvc-name', pvc_name
    ])

  serve = dsl.ContainerOp(
      name='serve',
      image='gcr.io/ml-pipeline/ml-pipeline-kubeflow-deployer:'
            '7775692adf28d6f79098e76e839986c9ee55dd61',
      arguments=serve_args
  )
  serve.after(train)


  webui_args = [
          '--image', 'gcr.io/kubeflow-examples/mnist/web-ui:'
                     'v20190304-v0.2-176-g15d997b-pipelines',
          '--name', 'web-ui',
          '--container-port', '5000',
          '--service-port', '80',
          '--service-type', "LoadBalancer"
  ]
  if platform != 'GCP':
    webui_args.extend([
      '--cluster-name', "mnist-pipeline"
    ])

  web_ui = dsl.ContainerOp(
      name='web-ui',
      image='gcr.io/kubeflow-examples/mnist/deploy-service:latest',
      arguments=webui_args
  )
  web_ui.after(serve)

  steps = [train, serve, web_ui]
  for step in steps:
    if platform == 'GCP':
      step.apply(gcp.use_gcp_secret('user-gcp-sa'))
    else:
      step.apply(onprem.mount_pvc(pvc_name, 'local-storage', '/mnt'))
Exemple #2
0
  def __new__(
      cls,
      component_name: Text,
      input_dict: Dict[Text, Any],
      output_dict: Dict[Text, List[types.Artifact]],
      exec_properties: Dict[Text, Any],
      executor_class_path: Text,
      pipeline_properties: PipelineProperties,
  ):
    """Creates a new component.

    Args:
      component_name: TFX component name.
      input_dict: Dictionary of input names to TFX types, or
        kfp.dsl.PipelineParam representing input parameters.
      output_dict: Dictionary of output names to List of TFX types.
      exec_properties: Execution properties.
      executor_class_path: <module>.<class> for Python class of executor.
      pipeline_properties: Pipeline level properties shared by all components.

    Returns:
      Newly constructed TFX Kubeflow component instance.
    """
    outputs = output_dict.keys()
    file_outputs = {
        output: '/output/ml_metadata/{}'.format(output) for output in outputs
    }

    for k, v in pipeline_properties.exec_properties.items():
      exec_properties[k] = v

    arguments = [
        '--exec_properties',
        json.dumps(exec_properties),
        '--outputs',
        artifact_utils.jsonify_artifact_dict(output_dict),
        '--executor_class_path',
        executor_class_path,
        component_name,
    ]

    for k, v in input_dict.items():
      if isinstance(v, float) or isinstance(v, int):
        v = str(v)
      arguments.append('--{}'.format(k))
      arguments.append(v)

    container_op = dsl.ContainerOp(
        name=component_name,
        command=_COMMAND,
        image=pipeline_properties.tfx_image,
        arguments=arguments,
        file_outputs=file_outputs,
    )

    # Add the Argo workflow ID to the container's environment variable so it
    # can be used to uniquely place pipeline outputs under the pipeline_root.
    field_path = "metadata.labels['workflows.argoproj.io/workflow']"
    container_op.add_env_variable(
        k8s_client.V1EnvVar(
            name='WORKFLOW_ID',
            value_from=k8s_client.V1EnvVarSource(
                field_ref=k8s_client.V1ObjectFieldSelector(
                    field_path=field_path))))

    named_outputs = {output: container_op.outputs[output] for output in outputs}

    # This allows user code to refer to the ContainerOp 'op' output named 'x'
    # as op.outputs.x
    component_outputs = type('Output', (), named_outputs)

    return type(component_name, (BaseComponent,), {
        'container_op': container_op,
        'outputs': component_outputs
    })
Exemple #3
0
def some_op():
    return dsl.ContainerOp(
        name='sleep',
        image='busybox',
        command=['sleep 1'],
    )
 def op():
     return dsl.ContainerOp(name='Some component name', image='image')
 def init_container_pipeline():
     dsl.ContainerOp(name='hello',
                     image='alpine:latest',
                     command=['echo', 'hello'],
                     init_containers=[echo])
Exemple #6
0
def mlflow_pipeline():
    ml = dsl.ContainerOp(
        name="training pipeline",
        image="lego0142/pytorch_classifier:1.1",
    )
def taxi_cab_classification(
        output,
        project,
        column_names='gs://ml-pipeline-playground/tfx/taxi-cab-classification/column-names.json',
        key_columns='trip_start_timestamp',
        train='gs://ml-pipeline-playground/tfx/taxi-cab-classification/train.csv',
        evaluation='gs://ml-pipeline-playground/tfx/taxi-cab-classification/eval.csv',
        mode='local',
        preprocess_module='gs://ml-pipeline-playground/tfx/taxi-cab-classification/preprocessing.py',
        learning_rate=0.1,
        hidden_layer_size='1500',
        steps=3000,
        analyze_slice_column='trip_start_hour'):
    output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data'
    target_lambda = """lambda x: (x['target'] > x['fare'] * 0.2)"""
    target_class_lambda = """lambda x: 1 if (x['target'] > x['fare'] * 0.2) else 0"""

    tf_server_name = 'taxi-cab-classification-model-{{workflow.uid}}'

    if platform != 'GCP':
        vop = dsl.VolumeOp(name="create_pvc",
                           resource_name="pipeline-pvc",
                           modes=dsl.VOLUME_MODE_RWM,
                           size="1Gi")

        checkout = dsl.ContainerOp(
            name="checkout",
            image="alpine/git:latest",
            command=[
                "git", "clone", "https://github.com/kubeflow/pipelines.git",
                str(output) + "/pipelines"
            ],
        ).apply(onprem.mount_pvc(vop.outputs["name"], 'local-storage', output))
        checkout.after(vop)

    validation = dataflow_tf_data_validation_op(
        inference_data=train,
        validation_data=evaluation,
        column_names=column_names,
        key_columns=key_columns,
        gcp_project=project,
        run_mode=mode,
        validation_output=output_template,
    )
    if platform != 'GCP':
        validation.after(checkout)

    preprocess = dataflow_tf_transform_op(
        training_data_file_pattern=train,
        evaluation_data_file_pattern=evaluation,
        schema=validation.outputs['schema'],
        gcp_project=project,
        run_mode=mode,
        preprocessing_module=preprocess_module,
        transformed_data_dir=output_template)

    training = tf_train_op(transformed_data_dir=preprocess.output,
                           schema=validation.outputs['schema'],
                           learning_rate=learning_rate,
                           hidden_layer_size=hidden_layer_size,
                           steps=steps,
                           target='tips',
                           preprocessing_module=preprocess_module,
                           training_output_dir=output_template)

    analysis = dataflow_tf_model_analyze_op(
        model=training.output,
        evaluation_data=evaluation,
        schema=validation.outputs['schema'],
        gcp_project=project,
        run_mode=mode,
        slice_columns=analyze_slice_column,
        analysis_results_dir=output_template)

    prediction = dataflow_tf_predict_op(data_file_pattern=evaluation,
                                        schema=validation.outputs['schema'],
                                        target_column='tips',
                                        model=training.output,
                                        run_mode=mode,
                                        gcp_project=project,
                                        predictions_dir=output_template)

    cm = confusion_matrix_op(predictions=prediction.output,
                             target_lambda=target_lambda,
                             output_dir=output_template)

    roc = roc_op(predictions_dir=prediction.output,
                 target_lambda=target_class_lambda,
                 output_dir=output_template)

    if platform == 'GCP':
        deploy = kubeflow_deploy_op(model_dir=str(training.output) +
                                    '/export/export',
                                    server_name=tf_server_name)
    else:
        deploy = kubeflow_deploy_op(cluster_name=project,
                                    model_dir=str(training.output) +
                                    '/export/export',
                                    pvc_name=vop.outputs["name"],
                                    server_name=tf_server_name)

    steps = [
        validation, preprocess, training, analysis, prediction, cm, roc, deploy
    ]
    for step in steps:
        if platform == 'GCP':
            step.apply(gcp.use_gcp_secret('user-gcp-sa'))
        else:
            step.apply(
                onprem.mount_pvc(vop.outputs["name"], 'local-storage', output))
Exemple #8
0
def petcharts_pipeline_reptile(
    ACCESSKEY,
    SECRETKEY,
    BUCKET: str = "petcharts",
    CORPUSDATA: str = "corpus.txt",
    TRAINDATA: str = "unlabel_train1.csv",
    TESTDATA: str = "unlabel_test1.csv",
    TOKENIZER: str = "tokenizer.zip",
    PRETRAINED: str = "roberta.zip",
    TRANSFER: str = "roberta.transfer.zip",
    DOWNSTREAM: str = "reptile.zip",
    VOCABSIZE: int = 32000,
    CLASSES: int = 20,
    EPOCHS0: int = 24,
    EPOCHS1: int = 24,
    CONTRA_EPOCHS: int = 40,
    BATCHSIZE: int = 32,
    LOGDIR: str = "s3://petcharts/logs",
    LOGSTEPS: int = 500,
    SAVESTEPS: int = 10000,
    WEIGHTDECAY0: float = 0.1,
    WEIGHTDECAY1: float = 0.01,
    SCHEDULER0: str = "linear",
    SCHEDULER1: str = "linear",
    REGISTRYURL: str = "192.168.6.32:5000",
    HOSTURL: str = "http://minio-service.default.svc.cluster.local:9000",
):
    downstream = (dsl.ContainerOp(
        name="training transfer learning",
        image="{}/petclassify:reptile".format(REGISTRYURL),
        arguments=[
            "--host",
            HOSTURL,
            "--accesskey",
            ACCESSKEY,
            "--secretkey",
            SECRETKEY,
            "--bucket",
            BUCKET,
            "--pretrained",
            PRETRAINED,
            "--transfer",
            TRANSFER,
            "--downstream",
            DOWNSTREAM,
            "--classes",
            CLASSES,
            "--epochs",
            EPOCHS1,
            "--batchsize",
            BATCHSIZE,
            "--weightdecay",
            WEIGHTDECAY1,
            "--scheduler",
            SCHEDULER1,
            "--logdir",
            "{}.{}".format(LOGDIR, "downstream"),
        ],
        output_artifact_paths={
            "mlpipeline-ui-metadata": "/opt/mlpipeline-ui-metadata.json"
        },
    ).add_env_variable(V1EnvVar(
        name="S3_ENDPOINT", value=HOSTURL)).add_env_variable(
            V1EnvVar(name="S3_USE_HTTPS", value="0")).add_env_variable(
                V1EnvVar(name="S3_VERIFY_SSL", value="0")).add_env_variable(
                    V1EnvVar(name="AWS_ACCESS_KEY_ID",
                             value=ACCESSKEY)).add_env_variable(
                                 V1EnvVar(name="AWS_SECRET_ACCESS_KEY",
                                          value=SECRETKEY)))
    downstream.set_gpu_limit(1)
    downstream.add_node_selector_constraint("gpu-accelerator",
                                            "nvidia-highend")
    downstream.container.set_image_pull_policy("Always")
def workflow1(
        input_handle_eval: dsl.PipelineParam = dsl.PipelineParam(
            name='input-handle-eval',
            value='gs://aju-dev-demos-codelabs/KF/taxidata/eval/data.csv'),
        input_handle_train: dsl.PipelineParam = dsl.PipelineParam(
            name='input-handle-train',
            value='gs://aju-dev-demos-codelabs/KF/taxidata/train/data.csv'),
        outfile_prefix_eval: dsl.PipelineParam = dsl.PipelineParam(
            name='outfile-prefix-eval', value='eval_transformed'),
        outfile_prefix_train: dsl.PipelineParam = dsl.PipelineParam(
            name='outfile-prefix-train', value='train_transformed'),
        train_steps: dsl.PipelineParam = dsl.PipelineParam(name='train-steps',
                                                           value=10000),
        project: dsl.PipelineParam = dsl.PipelineParam(
            name='project', value='YOUR_PROJECT_HERE'),
        working_dir: dsl.PipelineParam = dsl.PipelineParam(
            name='working-dir', value='YOUR_GCS_DIR_HERE'),
        tft_setup_file: dsl.PipelineParam = dsl.PipelineParam(
            name='tft-setup-file', value='/ml/transform/setup.py'),
        tfma_setup_file: dsl.PipelineParam = dsl.PipelineParam(
            name='tfma-setup-file', value='/ml/analysis/setup.py'),
        workers: dsl.PipelineParam = dsl.PipelineParam(name='workers',
                                                       value=1),
        pss: dsl.PipelineParam = dsl.PipelineParam(name='pss', value=1),
        max_rows: dsl.PipelineParam = dsl.PipelineParam(name='max-rows',
                                                        value=10000),
        ts1: dsl.PipelineParam = dsl.PipelineParam(name='ts1', value=''),
        ts2: dsl.PipelineParam = dsl.PipelineParam(name='ts2', value=''),
        preprocessing_module1: dsl.PipelineParam = dsl.
    PipelineParam(
        name='preprocessing-module1',
        value='gs://aju-dev-demos-codelabs/KF/taxi-preproc/preprocessing.py'),
        preprocessing_module2: dsl.PipelineParam = dsl.
    PipelineParam(
        name='preprocessing-module2',
        value='gs://aju-dev-demos-codelabs/KF/taxi-preproc/preprocessing2.py'),
        preprocess_mode: dsl.PipelineParam = dsl.PipelineParam(
            name='preprocess-mode', value='local'),
        tfma_mode: dsl.PipelineParam = dsl.PipelineParam(name='tfma-mode',
                                                         value='local')):

    tfteval = dsl.ContainerOp(
        name='tft-eval',
        image='gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
        arguments=[
            "--input_handle", input_handle_eval, "--outfile_prefix",
            outfile_prefix_eval, "--working_dir",
            '%s/%s/tft-eval' % (working_dir, '{{workflow.name}}'), "--project",
            project, "--mode", preprocess_mode, "--setup_file", tft_setup_file,
            "--max-rows", '5000', "--ts1", ts1, "--ts2", ts2, "--stage",
            "eval", "--preprocessing-module", preprocessing_module1
        ]
        # file_outputs = {'transformed': '/output.txt'}
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
    tfttrain = dsl.ContainerOp(
        name='tft-train',
        image='gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
        arguments=[
            "--input_handle", input_handle_train, "--outfile_prefix",
            outfile_prefix_train, "--working_dir",
            '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'),
            "--project", project, "--mode", preprocess_mode, "--setup_file",
            tft_setup_file, "--max_rows", max_rows, "--ts1", ts1, "--ts2", ts2,
            "--stage", "train", "--preprocessing_module", preprocessing_module1
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))
    tfteval2 = dsl.ContainerOp(
        name='tft-eval2',
        image='gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
        arguments=[
            "--input_handle", input_handle_eval, "--outfile_prefix",
            outfile_prefix_eval, "--working_dir",
            '%s/%s/tft-eval2' % (working_dir, '{{workflow.name}}'),
            "--project", project, "--mode", preprocess_mode, "--setup_file",
            tft_setup_file, "--max_rows", '5000', "--ts1", ts1, "--ts2", ts2,
            "--stage", "eval", "--preprocessing_module", preprocessing_module2
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))
    tfttrain2 = dsl.ContainerOp(
        name='tft-train2',
        image='gcr.io/google-samples/ml-pipeline-dataflow-tftbq-taxi',
        arguments=[
            "--input_handle", input_handle_train, "--outfile_prefix",
            outfile_prefix_train, "--working_dir",
            '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'),
            "--project", project, "--mode", preprocess_mode, "--setup_file",
            tft_setup_file, "--max_rows", max_rows, "--ts1", ts1, "--ts2", ts2,
            "--stage", "train", "--preprocessing_module", preprocessing_module2
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))

    train = dsl.ContainerOp(
        name='train',
        image='gcr.io/google-samples/ml-pipeline-kubeflow-tf-taxi',
        arguments=[
            "--tf-transform-dir",
            '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'),
            "--output-dir",
            '%s/%s/tf' % (working_dir, '{{workflow.name}}'), "--working-dir",
            '%s/%s/tf/serving_model_dir' % (working_dir, '{{workflow.name}}'),
            "--job-dir",
            '%s/%s/tf' % (working_dir, '{{workflow.name}}'),
            "--train-files-dir",
            '%s/%s/tft-train' % (working_dir, '{{workflow.name}}'),
            "--eval-files-dir",
            '%s/%s/tft-eval' % (working_dir, '{{workflow.name}}'),
            "--train-files-prefix", outfile_prefix_train,
            "--eval-files-prefix", outfile_prefix_eval, "--train-steps",
            train_steps, "--workers", workers, "--pss", pss
        ])
    train.after(tfteval)
    train.after(tfttrain)

    train2 = dsl.ContainerOp(
        name='train2',
        image='gcr.io/google-samples/ml-pipeline-kubeflow-tf-taxi',
        arguments=[
            "--tf-transform-dir",
            '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'),
            "--output-dir",
            '%s/%s/tf2' % (working_dir, '{{workflow.name}}'), "--working-dir",
            '%s/%s/tf2/serving_model_dir' % (working_dir, '{{workflow.name}}'),
            "--job-dir",
            '%s/%s/tf2' % (working_dir, '{{workflow.name}}'),
            "--train-files-dir",
            '%s/%s/tft-train2' % (working_dir, '{{workflow.name}}'),
            "--eval-files-dir",
            '%s/%s/tft-eval2' % (working_dir, '{{workflow.name}}'),
            "--train-files-prefix", outfile_prefix_train,
            "--eval-files-prefix", outfile_prefix_eval, "--train-steps",
            train_steps, "--workers", '1', "--pss", '1'
        ])
    train2.after(tfteval2)
    train2.after(tfttrain2)

    analyze = dsl.ContainerOp(
        name='analyze',
        image='gcr.io/google-samples/ml-pipeline-dataflow-tfma-taxi',
        arguments=[
            "--input_csv", input_handle_eval, "--tfma_run_dir",
            '%s/%s/tfma/output' % (working_dir, '{{workflow.name}}'),
            "--eval_model_dir",
            '%s/%s/tf/eval_model_dir' % (working_dir, '{{workflow.name}}'),
            "--mode", tfma_mode, "--setup_file", tfma_setup_file, "--project",
            project
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))
    analyze2 = dsl.ContainerOp(
        name='analyze2',
        image='gcr.io/google-samples/ml-pipeline-dataflow-tfma-taxi',
        arguments=[
            "--input_csv", input_handle_eval, "--tfma_run_dir",
            '%s/%s/tfma2/output' % (working_dir, '{{workflow.name}}'),
            "--eval_model_dir",
            '%s/%s/tf2/eval_model_dir' % (working_dir, '{{workflow.name}}'),
            "--mode", tfma_mode, "--setup_file", tfma_setup_file, "--project",
            project
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))

    cmleop = dsl.ContainerOp(
        name='cmleop',
        image='gcr.io/google-samples/ml-pipeline-cmle-op',
        arguments=[
            "--gcs-path",
            '%s/%s/tf/serving_model_dir/export/chicago-taxi' %
            (working_dir, '{{workflow.name}}'), "--version-name",
            '{{workflow.name}}', "--project", project
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))
    cmleop2 = dsl.ContainerOp(
        name='cmleop2',
        image='gcr.io/google-samples/ml-pipeline-cmle-op',
        arguments=[
            "--gcs-path",
            '%s/%s/tf2/serving_model_dir/export/chicago-taxi' %
            (working_dir, '{{workflow.name}}'), "--version-name",
            '{{workflow.name}}_2', "--project", project
        ]).apply(gcp.use_gcp_secret('user-gcp-sa'))

    tfserving = dsl.ContainerOp(
        name='tfserving',
        image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve-taxi',
        arguments=[
            "--model_name", '{{workflow.name}}', "--model_path",
            '%s/%s/tf/serving_model_dir/export/chicago-taxi' %
            (working_dir, '{{workflow.name}}')
        ])
    tfserving2 = dsl.ContainerOp(
        name='tfserving2',
        image='gcr.io/google-samples/ml-pipeline-kubeflow-tfserve-taxi',
        arguments=[
            "--model_name", '{{workflow.name}}-2', "--model_path",
            '%s/%s/tf2/serving_model_dir/export/chicago-taxi' %
            (working_dir, '{{workflow.name}}')
        ])

    analyze.after(train)
    analyze2.after(train2)
    cmleop.after(train)
    cmleop2.after(train2)
    tfserving.after(train)
    tfserving2.after(train2)
Exemple #10
0
    def test_operator_to_template(self):
        """Test converting operator to template"""

        from kubernetes import client as k8s_client

        with dsl.Pipeline('somename') as p:
            msg1 = dsl.PipelineParam('msg1')
            msg2 = dsl.PipelineParam('msg2', value='value2')
            op = dsl.ContainerOp(name='echo', image='image', command=['sh', '-c'],
                                 arguments=['echo %s %s | tee /tmp/message.txt' % (msg1, msg2)],
                                 file_outputs={'merged': '/tmp/message.txt'}) \
              .add_volume_mount(k8s_client.V1VolumeMount(
                mount_path='/secret/gcp-credentials',
                name='gcp-credentials')) \
              .add_env_variable(k8s_client.V1EnvVar(
                name='GOOGLE_APPLICATION_CREDENTIALS',
                value='/secret/gcp-credentials/user-gcp-sa.json'))
        golden_output = {
            'container': {
                'image':
                'image',
                'args': [
                    'echo {{inputs.parameters.msg1}} {{inputs.parameters.msg2}} | tee /tmp/message.txt'
                ],
                'command': ['sh', '-c'],
                'env': [{
                    'name': 'GOOGLE_APPLICATION_CREDENTIALS',
                    'value': '/secret/gcp-credentials/user-gcp-sa.json'
                }],
                'volumeMounts': [{
                    'mountPath': '/secret/gcp-credentials',
                    'name': 'gcp-credentials',
                }]
            },
            'inputs': {
                'parameters': [
                    {
                        'name': 'msg1'
                    },
                    {
                        'name': 'msg2',
                        'value': 'value2'
                    },
                ]
            },
            'name': 'echo',
            'outputs': {
                'parameters': [{
                    'name': 'echo-merged',
                    'valueFrom': {
                        'path': '/tmp/message.txt'
                    }
                }],
                'artifacts': [{
                    'name': 'mlpipeline-ui-metadata',
                    'path': '/mlpipeline-ui-metadata.json',
                    's3': {
                        'accessKeySecret': {
                            'key': 'accesskey',
                            'name': 'mlpipeline-minio-artifact',
                        },
                        'bucket': 'mlpipeline',
                        'endpoint': 'minio-service.kubeflow:9000',
                        'insecure': True,
                        'key':
                        'runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-ui-metadata.tgz',
                        'secretKeySecret': {
                            'key': 'secretkey',
                            'name': 'mlpipeline-minio-artifact',
                        }
                    }
                }, {
                    'name': 'mlpipeline-metrics',
                    'path': '/mlpipeline-metrics.json',
                    's3': {
                        'accessKeySecret': {
                            'key': 'accesskey',
                            'name': 'mlpipeline-minio-artifact',
                        },
                        'bucket': 'mlpipeline',
                        'endpoint': 'minio-service.kubeflow:9000',
                        'insecure': True,
                        'key':
                        'runs/{{workflow.uid}}/{{pod.name}}/mlpipeline-metrics.tgz',
                        'secretKeySecret': {
                            'key': 'secretkey',
                            'name': 'mlpipeline-minio-artifact',
                        }
                    }
                }]
            }
        }

        self.maxDiff = None
        self.assertEqual(golden_output,
                         compiler.Compiler()._op_to_template(op))
def train_and_deploy(project=dsl.PipelineParam(name='project',
                                               value='cloud-training-demos'),
                     bucket=dsl.PipelineParam(name='bucket',
                                              value='cloud-training-demos-ml'),
                     startYear=dsl.PipelineParam(name='startYear',
                                                 value='2000')):
    """Pipeline to train babyweight model"""
    start_step = 3

    # Step 1: create training dataset using Apache Beam on Cloud Dataflow
    if start_step <= 1:
        preprocess = dsl.ContainerOp(
            name='preprocess',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-bqtocsv:latest',
            arguments=[
                '--project', project, '--mode', 'cloud', '--bucket', bucket,
                '--start_year', startYear
            ],
            file_outputs={'bucket': '/output.txt'})
    else:
        preprocess = ObjectDict({'outputs': {'bucket': bucket}})

    # Step 2: Do hyperparameter tuning of the model on Cloud ML Engine
    if start_step <= 2:
        hparam_train = dsl.ContainerOp(
            name='hypertrain',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-hypertrain:latest',
            arguments=[preprocess.outputs['bucket']],
            file_outputs={'jobname': '/output.txt'})
    else:
        hparam_train = ObjectDict(
            {'outputs': {
                'jobname': 'babyweight_181008_210829'
            }})

    # Step 3: Train the model some more, but on the pipelines cluster itself
    if start_step <= 3:
        # train: /output.txt is the model directory
        train_tuned = kubeflow_tfjob_launcher_op(
            container_image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-traintuned-trainer:latest',
            command=[hparam_train.outputs['jobname'], bucket],
            number_of_workers=10,
            number_of_parameter_servers=3,
            tfjob_timeout_minutes=5,
            step_name='traintuned')
    else:
        train_tuned = ObjectDict({
            'outputs': {
                'train':
                'gs://cloud-training-demos-ml/babyweight/hyperparam/15'
            }
        })

    # Step 4: Deploy the trained model to Cloud ML Engine
    if start_step <= 4:
        deploy_cmle = dsl.ContainerOp(
            name='deploycmle',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-deploycmle:latest',
            arguments=[
                train_tuned.outputs['train'],  # modeldir
                'babyweight',
                'mlp'
            ],
            file_outputs={
                'model': '/model.txt',
                'version': '/version.txt'
            })
    else:
        deploy_cmle = ObjectDict(
            {'outputs': {
                'model': 'babyweight',
                'version': 'mlp'
            }})

    # Step 4: Deploy the trained model to Cloud ML Engine
    if start_step <= 5:
        deploy_cmle = dsl.ContainerOp(
            name='deployapp',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-deployapp:latest',
            arguments=[
                deploy_cmle.outputs['model'], deploy_cmle.outputs['version']
            ],
            file_outputs={'appurl': '/appurl.txt'})
    else:
        deploy_cmle = ObjectDict({
            'outputs': {
                'appurl': 'https://cloud-training-demos.appspot.com/'
            }
        })
Exemple #12
0
def nlp_pipeline(
        csv_url="https://raw.githubusercontent.com/axsauze/reddit-classification-exploration/master/data/reddit_train.csv",
        csv_encoding="ISO-8859-1",
        features_column="BODY",
        labels_column="REMOVED",
        raw_text_path='/mnt/text.data',
        labels_path='/mnt/labels.data',
        clean_text_path='/mnt/clean.data',
        spacy_tokens_path='/mnt/tokens.data',
        tfidf_vectors_path='/mnt/tfidf.data',
        lr_prediction_path='/mnt/prediction.data',
        tfidf_model_path='/mnt/tfidf.model',
        lr_model_path='/mnt/lr.model',
        lr_c_param=0.1,
        tfidf_max_features=10000,
        tfidf_ngram_range=3,
        batch_size='100',
        github_branch='master'):
    """
    Pipeline 
    """

    pvc_metadata = V1ObjectMeta(name="{{workflow.name}}-my-pvc",
                                labels={
                                    "branch":
                                    "{{workflow.parameters.github-branch}}",
                                    "app": "nlp"
                                })
    requested_resources = V1ResourceRequirements(requests={"storage": "1Gi"})
    pvc_spec = V1PersistentVolumeClaimSpec(access_modes=["ReadWriteOnce"],
                                           resources=requested_resources)
    pvc = V1PersistentVolumeClaim(api_version="v1",
                                  kind="PersistentVolumeClaim",
                                  metadata=pvc_metadata,
                                  spec=pvc_spec)

    vop = dsl.VolumeOp(name="create-pvc", k8s_resource=pvc, modes=None)

    download_step = dsl.ContainerOp(
        name='data_downloader',
        image='maximmold/data_downloader:0.1',
        command="python",
        arguments=[
            "/microservice/pipeline_step.py", "--labels-path", labels_path,
            "--features-path", raw_text_path, "--csv-url", csv_url,
            "--csv-encoding", csv_encoding, "--features-column",
            features_column, "--labels-column", labels_column
        ],
        pvolumes={"/mnt": vop.volume})

    clean_step = dsl.ContainerOp(name='clean_text',
                                 image='maximmold/clean_text_transformer:0.1',
                                 command="python",
                                 arguments=[
                                     "/microservice/pipeline_step.py",
                                     "--in-path",
                                     raw_text_path,
                                     "--out-path",
                                     clean_text_path,
                                 ],
                                 pvolumes={"/mnt": download_step.pvolume})

    tokenize_step = dsl.ContainerOp(name='tokenize',
                                    image='maximmold/spacy_tokenizer:0.1',
                                    command="python",
                                    arguments=[
                                        "/microservice/pipeline_step.py",
                                        "--in-path",
                                        clean_text_path,
                                        "--out-path",
                                        spacy_tokens_path,
                                    ],
                                    pvolumes={"/mnt": clean_step.pvolume})

    vectorize_step = dsl.ContainerOp(name='vectorize',
                                     image='maximmold/tfidf_vectorizer:0.1',
                                     command="python",
                                     arguments=[
                                         "/microservice/pipeline_step.py",
                                         "--in-path",
                                         spacy_tokens_path,
                                         "--out-path",
                                         tfidf_vectors_path,
                                         "--max-features",
                                         tfidf_max_features,
                                         "--ngram-range",
                                         tfidf_ngram_range,
                                         "--action",
                                         "train",
                                         "--model-path",
                                         tfidf_model_path,
                                     ],
                                     pvolumes={"/mnt": tokenize_step.pvolume})

    predict_step = dsl.ContainerOp(name='predictor',
                                   image='maximmold/lr_text_classifier:0.1',
                                   command="python",
                                   arguments=[
                                       "/microservice/pipeline_step.py",
                                       "--in-path",
                                       tfidf_vectors_path,
                                       "--labels-path",
                                       labels_path,
                                       "--out-path",
                                       lr_prediction_path,
                                       "--c-param",
                                       lr_c_param,
                                       "--action",
                                       "train",
                                       "--model-path",
                                       lr_model_path,
                                   ],
                                   pvolumes={"/mnt": vectorize_step.pvolume})

    try:
        seldon_config = yaml.load(
            open("../deploy_pipeline/seldon_production_pipeline.yaml"))
    except:
        # If this file is run from the project core directory
        seldon_config = yaml.load(
            open("deploy_pipeline/seldon_production_pipeline.yaml"))

    deploy_step = dsl.ResourceOp(
        action="apply",
        name="seldondeploy",
        k8s_resource=seldon_config,
        attribute_outputs={"name": "{.metadata.name}"})

    deploy_step.after(predict_step)

    delete_previous_pvc = dsl.ContainerOp(
        name="deletepreviouspvc",
        image="bitnami/kubectl",
        command="kubectl",
        arguments=[
            "delete", "pvc", "-l",
            "app=nlp,branch={{workflow.parameters.github-branch}}",
            "--field-selector", "metadata.name!={{workflow.name}}-my-pvc",
            "--grace-period=0", "--force", "--wait=false"
        ])

    delete_previous_pvc.after(deploy_step)

    patch_pvc_finalizer = dsl.ContainerOp(
        name="patchpvcfinalizer",
        image="bitnami/kubectl",
        command=["bash"],
        arguments=[
            "-c",
            'for j in $(kubectl get pvc -o name -l app=nlp,branch={{workflow.parameters.github-branch}} --field-selector metadata.name!={{workflow.name}}-my-pvc -n kubeflow); do kubectl patch $j -p '
            "'"
            '{"metadata":{"finalizers": []}}'
            "'"
            ' -n kubeflow --type=merge; done'
        ])

    patch_pvc_finalizer.after(delete_previous_pvc)
Exemple #13
0
    def _build_kfp_ops(
        self,
        node_dependencies: Dict[Node, Set[Node]],
        image,
        image_pull_policy,
    ) -> Dict[str, dsl.ContainerOp]:
        """Build kfp container graph from Kedro node dependencies. """
        kfp_ops = {}

        node_volumes = (
            self._setup_volumes(image, image_pull_policy)
            if self.run_config.volume is not None
            else {}
        )

        iap_env_var = k8s.V1EnvVar(
            name=IAP_CLIENT_ID, value=os.environ.get(IAP_CLIENT_ID, "")
        )
        nodes_env = [iap_env_var]

        if is_mlflow_enabled():
            kfp_ops["mlflow-start-run"] = self._customize_op(
                dsl.ContainerOp(
                    name="mlflow-start-run",
                    image=image,
                    command=["kedro"],
                    arguments=[
                        "kubeflow",
                        "mlflow-start",
                        dsl.RUN_ID_PLACEHOLDER,
                    ],
                    container_kwargs={"env": [iap_env_var]},
                    file_outputs={"mlflow_run_id": "/tmp/mlflow_run_id"},
                ),
                image_pull_policy,
            )

            nodes_env.append(
                k8s.V1EnvVar(
                    name="MLFLOW_RUN_ID",
                    value=kfp_ops["mlflow-start-run"].output,
                )
            )

        for node in node_dependencies:
            name = clean_name(node.name)
            params = ",".join(
                [
                    f"{param}:{dsl.PipelineParam(param)}"
                    for param in self.context.params.keys()
                ]
            )
            kwargs = {"env": nodes_env}
            if self.run_config.resources.is_set_for(node.name):
                kwargs["resources"] = k8s.V1ResourceRequirements(
                    limits=self.run_config.resources.get_for(node.name),
                    requests=self.run_config.resources.get_for(node.name),
                )

            kfp_ops[node.name] = self._customize_op(
                dsl.ContainerOp(
                    name=name,
                    image=image,
                    command=["kedro"],
                    arguments=[
                        "run",
                        "--params",
                        params,
                        "--node",
                        node.name,
                    ],
                    pvolumes=node_volumes,
                    container_kwargs=kwargs,
                    file_outputs={
                        output: "/home/kedro/"
                        + self.catalog[output]["filepath"]
                        for output in node.outputs
                        if output in self.catalog
                    },
                ),
                image_pull_policy,
            )

        return kfp_ops
Exemple #14
0
def mlrun_op(name: str = '',
             project: str = '',
             image: str = 'v3io/mlrun',
             runtime: str = '',
             command: str = '',
             secrets: list = [],
             params: dict = {},
             hyperparams: dict = {},
             param_file: str = '',
             inputs: dict = {},
             outputs: dict = {},
             in_path: str = '',
             out_path: str = '',
             rundb: str = '',
             mode: str = ''):
    """mlrun KubeFlow pipelines operator, use to form pipeline steps

    when using kubeflow pipelines, each step is wrapped in an mlrun_op
    one step can pass state and data to the next step, see example below.

    :param name:    name used for the step
    :param project: optional, project name
    :param image:   optional, run container image (will be executing the step)
                    the container should host all requiered packages + code
                    for the run, alternatively user can mount packages/code via
                    shared file volumes like v3io (see example below)
    :param runtime: optional, runtime specification
    :param command: exec command (or URL for functions)
    :param secrets: extra secrets specs, will be injected into the runtime
                    e.g. ['file=<filename>', 'env=ENV_KEY1,ENV_KEY2']
    :param params:  dictionary of run parameters and values
    :param hyperparams: dictionary of hyper parameters and list values, each
                        hyperparam holds a list of values, the run will be
                        executed for every parameter combination (GridSearch)
    :param param_file:  a csv file with parameter combinations, first row hold
                        the parameter names, following rows hold param values
    :param inputs:   dictionary of input objects + optional paths (if path is
                     omitted the path will be the in_path/key.
    :param outputs:  dictionary of input objects + optional paths (if path is
                     omitted the path will be the out_path/key.
    :param in_path:  default input path/url (prefix) for inputs
    :param out_path: default output path/url (prefix) for artifacts
    :param rundb:    path for rundb (or use 'MLRUN_META_DBPATH' env instead)
    :param mode:     run mode, e.g. 'noctx' for pushing params as args

    :return: KFP step operation

    Example:
    from kfp import dsl
    from mlrun import mlrun_op
    from mlrun.platforms import mount_v3io

    def mlrun_train(p1, p2):
    return mlrun_op('training',
                    command = '/User/kubeflow/training.py',
                    params = {'p1':p1, 'p2':p2},
                    outputs = {'model.txt':'', 'dataset.csv':''},
                    out_path ='v3io:///bigdata/mlrun/{{workflow.uid}}/',
                    rundb = '/User/kubeflow')

    # use data from the first step
    def mlrun_validate(modelfile):
        return mlrun_op('validation',
                    command = '/User/kubeflow/validation.py',
                    inputs = {'model.txt':modelfile},
                    out_path ='v3io:///bigdata/mlrun/{{workflow.uid}}/',
                    rundb = '/User/kubeflow')

    @dsl.pipeline(
        name='My MLRUN pipeline', description='Shows how to use mlrun.'
    )
    def mlrun_pipeline(
        p1 = 5 , p2 = '"text"'
    ):
        # run training, mount_v3io will mount "/User" into the pipeline step
        train = mlrun_train(p1, p2).apply(mount_v3io())

        # feed 1st step results into the secound step
        validate = mlrun_validate(train.outputs['model-txt']).apply(mount_v3io())

    """
    from kfp import dsl
    from os import environ

    rundb = rundb or environ.get('MLRUN_META_DBPATH')
    cmd = [
        'python', '-m', 'mlrun', 'run', '--kfp', '--workflow',
        '{{workflow.uid}}', '--name', name
    ]
    file_outputs = {}
    for s in secrets:
        cmd += ['-s', '{}'.format(s)]
    for p, val in params.items():
        cmd += ['-p', '{}={}'.format(p, val)]
    for x, val in hyperparams.items():
        cmd += ['-x', '{}={}'.format(x, val)]
    for i, val in inputs.items():
        cmd += ['-i', '{}={}'.format(i, val)]
    for o, val in outputs.items():
        cmd += ['-o', '{}={}'.format(o, val)]
        file_outputs[o.replace('.', '-')] = '/tmp/{}'.format(o)
    if project:
        cmd += ['--project', project]
    if runtime:
        cmd += ['--runtime', runtime]
    if in_path:
        cmd += ['--in-path', in_path]
    if out_path:
        cmd += ['--out-path', out_path]
    if rundb:
        cmd += ['--rundb', rundb]
    if param_file:
        cmd += ['--param-file', param_file]
    if mode:
        cmd += ['--mode', mode]

    if hyperparams or param_file:
        file_outputs['iterations'] = '/tmp/iteration_results.csv'

    cop = dsl.ContainerOp(
        name=name,
        image=image,
        command=cmd + [command],
        file_outputs=file_outputs,
    )
    return cop
Exemple #15
0
    def __init__(self,
                 component: tfx_base_component.BaseComponent,
                 component_launcher_class: Type[
                     base_component_launcher.BaseComponentLauncher],
                 depends_on: Set[dsl.ContainerOp],
                 pipeline: tfx_pipeline.Pipeline,
                 pipeline_name: Text,
                 pipeline_root: dsl.PipelineParam,
                 tfx_image: Text,
                 kubeflow_metadata_config: Optional[
                     kubeflow_pb2.KubeflowMetadataConfig],
                 component_config: base_component_config.BaseComponentConfig,
                 pod_labels_to_attach: Optional[Dict[Text, Text]] = None):
        """Creates a new Kubeflow-based component.

    This class essentially wraps a dsl.ContainerOp construct in Kubeflow
    Pipelines.

    Args:
      component: The logical TFX component to wrap.
      component_launcher_class: the class of the launcher to launch the
        component.
      depends_on: The set of upstream KFP ContainerOp components that this
        component will depend on.
      pipeline: The logical TFX pipeline to which this component belongs.
      pipeline_name: The name of the TFX pipeline.
      pipeline_root: The pipeline root specified, as a dsl.PipelineParam
      tfx_image: The container image to use for this component.
      kubeflow_metadata_config: Configuration settings for connecting to the
        MLMD store in a Kubeflow cluster.
      component_config: Component config to launch the component.
      pod_labels_to_attach: Optional dict of pod labels to attach to the
        GKE pod.
    """
        component_launcher_class_path = '.'.join([
            component_launcher_class.__module__,
            component_launcher_class.__name__
        ])

        serialized_component = utils.replace_placeholder(
            json_utils.dumps(node_wrapper.NodeWrapper(component)))

        arguments = [
            '--pipeline_name',
            pipeline_name,
            '--pipeline_root',
            pipeline_root,
            '--kubeflow_metadata_config',
            json_format.MessageToJson(message=kubeflow_metadata_config,
                                      preserving_proto_field_name=True),
            '--beam_pipeline_args',
            json.dumps(pipeline.beam_pipeline_args),
            '--additional_pipeline_args',
            json.dumps(pipeline.additional_pipeline_args),
            '--component_launcher_class_path',
            component_launcher_class_path,
            '--serialized_component',
            serialized_component,
            '--component_config',
            json_utils.dumps(component_config),
        ]

        if component.enable_cache or (component.enable_cache is None
                                      and pipeline.enable_cache):
            arguments.append('--enable_cache')

        self.container_op = dsl.ContainerOp(
            name=component.id.replace('.', '_'),
            command=_COMMAND,
            image=tfx_image,
            arguments=arguments,
            output_artifact_paths={
                'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json',
            },
        )

        absl.logging.info(
            'Adding upstream dependencies for component {}'.format(
                self.container_op.name))
        for op in depends_on:
            absl.logging.info('   ->  Component: {}'.format(op.name))
            self.container_op.after(op)

        # TODO(b/140172100): Document the use of additional_pipeline_args.
        if _WORKFLOW_ID_KEY in pipeline.additional_pipeline_args:
            # Allow overriding pipeline's run_id externally, primarily for testing.
            self.container_op.container.add_env_variable(
                k8s_client.V1EnvVar(
                    name=_WORKFLOW_ID_KEY,
                    value=pipeline.additional_pipeline_args[_WORKFLOW_ID_KEY]))
        else:
            # Add the Argo workflow ID to the container's environment variable so it
            # can be used to uniquely place pipeline outputs under the pipeline_root.
            field_path = "metadata.labels['workflows.argoproj.io/workflow']"
            self.container_op.container.add_env_variable(
                k8s_client.V1EnvVar(
                    name=_WORKFLOW_ID_KEY,
                    value_from=k8s_client.V1EnvVarSource(
                        field_ref=k8s_client.V1ObjectFieldSelector(
                            field_path=field_path))))

        if pod_labels_to_attach:
            for k, v in pod_labels_to_attach.items():
                self.container_op.add_pod_label(k, v)
Exemple #16
0
def deploy_model_op(model):

    return dsl.ContainerOp(
        name='Deploy Model',
        image='gcr.io/kube01/kubeflow/presidentialelections/deploy:latest',
        arguments=['--model', model])
def nlp_pipeline(
    namespace="kubeflow",
    seldon_server="SKLEARN_SERVER",
    model_path="gs://seldon-models/v1.13.0-dev/sklearn/iris",
    gateway_endpoint="istio-ingressgateway.istio-system.svc.cluster.local",
    retries=3,
    replicas=10,
    workers=100,
    input_path="data/input-data.txt",
    output_path="data/output-data.txt",
):
    """
    Pipeline 
    """

    vop = dsl.VolumeOp(
        name="seldon-batch-pvc",
        resource_name="seldon-batch-pvc",
        modes=dsl.VOLUME_MODE_RWO,
        size="2Mi",
    )

    seldon_deployment_yaml = f"""
apiVersion: machinelearning.seldon.io/v1
kind: SeldonDeployment
metadata:
  name: "{{{{workflow.name}}}}"
  namespace: "{namespace}"
spec:
  name: "{{{{workflow.name}}}}"
  predictors:
  - graph:
      children: []
      implementation: "{seldon_server}"
      modelUri: "{model_path}"
      name: classifier
    name: default
    """

    deploy_step = dsl.ResourceOp(
        name="deploy_seldon",
        action="create",
        k8s_resource=yaml.safe_load(seldon_deployment_yaml),
    )

    scale_and_wait = dsl.ContainerOp(
        name="scale_and_wait_seldon",
        image="bitnami/kubectl:1.17",
        command="bash",
        arguments=[
            "-c",
            f"sleep 10 && kubectl scale --namespace {namespace} --replicas={replicas} sdep/{{{{workflow.name}}}} && sleep 2 && kubectl rollout status deploy/$(kubectl get deploy -l seldon-deployment-id={{{{workflow.name}}}} -o jsonpath='{{.items[0].metadata.name'}})",
        ],
    )

    download_from_object_store = dsl.ContainerOp(
        name="download-from-object-store",
        image="minio/mc:RELEASE.2020-04-17T08-55-48Z",
        command="sh",
        arguments=[
            "-c",
            f"mc config host add minio-local http://minio.default.svc.cluster.local:9000 minioadmin minioadmin && mc cp minio-local/{input_path} /assets/input-data.txt",
        ],
        pvolumes={"/assets": vop.volume},
    )

    batch_process_step = dsl.ContainerOp(
        name="data_downloader",
        image="seldonio/seldon-core-s2i-python37:1.1.1-rc",
        command="seldon-batch-processor",
        arguments=[
            "--deployment-name",
            "{{workflow.name}}",
            "--namespace",
            namespace,
            "--host",
            gateway_endpoint,
            "--retries",
            retries,
            "--input-data-path",
            "/assets/input-data.txt",
            "--output-data-path",
            "/assets/output-data.txt",
            "--benchmark",
        ],
        pvolumes={"/assets": vop.volume},
    )

    upload_to_object_store = dsl.ContainerOp(
        name="upload-to-object-store",
        image="minio/mc:RELEASE.2020-04-17T08-55-48Z",
        command="sh",
        arguments=[
            "-c",
            f"mc config host add minio-local http://minio.default.svc.cluster.local:9000 minioadmin minioadmin && mc cp /assets/output-data.txt minio-local/{output_path}",
        ],
        pvolumes={"/assets": vop.volume},
    )

    delete_step = dsl.ResourceOp(
        name="delete_seldon",
        action="delete",
        k8s_resource=yaml.safe_load(seldon_deployment_yaml),
    )

    scale_and_wait.after(deploy_step)
    download_from_object_store.after(scale_and_wait)
    batch_process_step.after(download_from_object_store)
    upload_to_object_store.after(batch_process_step)
    delete_step.after(upload_to_object_store)
def train_and_deploy(project='cloud-training-demos',
                     bucket='cloud-training-demos-ml',
                     startYear='2000'):
    """Pipeline to train babyweight model"""
    start_step = 1

    # Step 1: create training dataset using Apache Beam on Cloud Dataflow
    if start_step <= 1:
        preprocess = dsl.ContainerOp(
            name='preprocess',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-bqtocsv:latest',
            arguments=[
                '--project', project, '--mode', 'cloud', '--bucket', bucket,
                '--start_year', startYear
            ],
            file_outputs={'bucket': '/output.txt'})
    else:
        preprocess = ObjectDict({'outputs': {'bucket': bucket}})

    # Step 2: Do hyperparameter tuning of the model on Cloud ML Engine
    if start_step <= 2:
        hparam_train = dsl.ContainerOp(
            name='hypertrain',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-hypertrain:latest',
            arguments=[preprocess.outputs['bucket']],
            file_outputs={'jobname': '/output.txt'})
    else:
        hparam_train = ObjectDict(
            {'outputs': {
                'jobname': 'babyweight_181008_210829'
            }})

    # Step 3: Train the model some more, but on the pipelines cluster itself
    if start_step <= 3:
        train_tuned = dsl.ContainerOp(
            name='traintuned',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-traintuned-trainer:latest',
            #image='gcr.io/cloud-training-demos/babyweight-pipeline-traintuned-trainer@sha256:3d73c805430a16d0675aeafa9819d6d2cfbad0f0f34cff5fb9ed4e24493bc9a8',
            arguments=[hparam_train.outputs['jobname'], bucket],
            file_outputs={'train': '/output.txt'})
        train_tuned.set_memory_request('2G')
        train_tuned.set_cpu_request('1')
    else:
        train_tuned = ObjectDict({
            'outputs': {
                'train':
                'gs://cloud-training-demos-ml/babyweight/hyperparam/15'
            }
        })

    # Step 4: Deploy the trained model to Cloud ML Engine
    if start_step <= 4:
        deploy_cmle = dsl.ContainerOp(
            name='deploycmle',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-deploycmle:latest',
            arguments=[
                train_tuned.outputs['train'],  # modeldir
                'babyweight',
                'mlp'
            ],
            file_outputs={
                'model': '/model.txt',
                'version': '/version.txt'
            })
    else:
        deploy_cmle = ObjectDict(
            {'outputs': {
                'model': 'babyweight',
                'version': 'mlp'
            }})

    # Step 5: Deploy the trained model to AppEngine
    if start_step <= 5:
        deploy_cmle = dsl.ContainerOp(
            name='deployapp',
            # image needs to be a compile-time string
            image=
            'gcr.io/cloud-training-demos/babyweight-pipeline-deployapp:latest',
            arguments=[
                deploy_cmle.outputs['model'], deploy_cmle.outputs['version']
            ],
            file_outputs={'appurl': '/appurl.txt'})
    else:
        deploy_cmle = ObjectDict({
            'outputs': {
                'appurl': 'https://cloud-training-demos.appspot.com/'
            }
        })
def echo2_op(text2):
    return dsl.ContainerOp(name='echo2',
                           image='library/bash:4.4.23',
                           command=['sh', '-c'],
                           arguments=['echo "$0"', text2])
Exemple #20
0
def echo_op():
    return dsl.ContainerOp(name='echo',
                           image='busybox',
                           command=['sh', '-c'],
                           arguments=['echo "Found my node"'])
 def some_name():
     dsl.ContainerOp(
         name='some_name',
         image='alpine:latest',
     )
Exemple #22
0
def query_op(n):
    return dsl.ContainerOp(
        name = n,
        image = "hanjoo8821/jdbc-tibero:basic",
        container_kwargs = {'env': [V1EnvVar('id', 'hanjoo'), V1EnvVar('pw', '1010')]}
    )
 def no_outputs_pipeline():
     no_outputs_op = dsl.ContainerOp(name='dummy', image='dummy')
     dsl.ContainerOp(name='dummy',
                     image='dummy',
                     arguments=[no_outputs_op.output])
Exemple #24
0
def TrainingOp(name: str, input_dir: str, output_dir: str, epochs: int,
               model_name: str, model_version: int, batch_size: int,
               learning_rate: float, momentum: float, lr_patience: int,
               resize: int, scale_img: int, dropout: int, crop_pct: float,
               growth_rate: int, num_classes: int, input_size: int,
               prefetch_size: int, shuffle_buffer: int, volume: str):
    """Start model training within Kubeflow pipeline
    
    Arguments:
        name {str} -- operation name for Kubeflow UID (eg Training)
        output_dir {str} -- Output directory containing artifacts from training (eg. "/directory/on/local/filesystem")
        epochs {int} -- Number of epochs for model training (eg. 10)
        model_name {str} -- Name of the model (eg "peleenet")
        model_version {int} -- Version of the model (eg 1)
        batch_size {int} -- Batch size to use for mini-batch training (eg. 64)
        learning_rate {float} -- Learning rate for training model (eg. 1e-3 or 0.0001)
        momentum {float} -- Momentum factor for use with SGD optimizer
        lr_patience {int} -- Patience interval to wait for 
        dropout {float} -- Percentage of dropout to add to the network (eg .5 == 50%)
        resume_training {bool} -- Resume training of a saved model (eg. True or False)
        resize {int} -- Resize training data (eg. 32 (where original image size is (224,224) this would resize the image to (256, 256)))
        scale_img {int} -- Factor by which to scale the input image (eg. 7 (if the input image is 32x32x3 (HWC) the output would be (224,224,3)))
        crop_pct {float} -- Percentage to center crop training images (eg. 0.5 will center crop to the middle 50% of pixels in the image)
        dataset_split {list} -- Splits to use for Training, Validation, and Test sets (if applicable)
        growth_rate {int} -- Growth rate to use (see DenseNet and PeleeNet paper : https://arxiv.org/abs/1804.06882)
        bottle_neck_width {List[int]} -- Bottle beck widths to use for the Dense layers
        num_classes {int} -- Number of classes the model is being used for
        input_size {int} -- Input size of the images used for training
        prefetch_size {int} -- Number of batches to prefetch for training
        shuffle_buffer {int} -- Number of examples to store in buffer for shuffling datasets too large to fit in memory
        volume {str} -- Name of volume to map into container for access to data
    """

    return dsl.ContainerOp(
        name=name,
        # TODO enter container image name
        image='edhenry/peleenet-train:latest',
        arguments=[
            '--input_dir',
            input_dir,
            '--output_dir',
            output_dir,
            '--epochs',
            epochs,
            '--model_name',
            model_name,
            '--model_version',
            model_version,
            '--batch_size',
            batch_size,
            '--learning_rate',
            learning_rate,
            '--momentum',
            momentum,
            '--lr_patience',
            lr_patience,
            '--dropout',
            dropout,
            #'--resume_training', resume_training,
            '--resize',
            resize,
            '--scale_img',
            scale_img,
            '--crop_pct',
            crop_pct,
            #'--dataset_split', dataset_split,
            '--growth_rate',
            growth_rate,
            #'--bottle_neck_width', bottle_neck_width,
            '--num_classes',
            num_classes,
            '--input_size',
            input_size,
            '--prefetch_size',
            prefetch_size,
            '--shuffle_buffer',
            shuffle_buffer
        ],
        pvolumes=volume,
        file_outputs={}).set_gpu_limit(1)
Exemple #25
0
def query_op():
    return dsl.ContainerOp(
        name = "JDBC Agent",
        image = "hanjoo8821/jdbc-tibero:menu"
    )
Exemple #26
0
def mnist_hpo(name="mnist",
              namespace="anonymous",
              goal: float = 0.99,
              parallelTrialCount: int = 3,
              maxTrialCount: int = 12,
              experimentTimeoutMinutes: int = 60,
              deleteAfterDone: bool = True):
    objectiveConfig = {
        "type": "maximize",
        "goal": goal,
        "objectiveMetricName": "Validation-accuracy",
        "additionalMetricNames": ["accuracy"]
    }
    algorithmConfig = {"algorithmName": "random"}
    parameters = [{
        "name": "--lr",
        "parameterType": "double",
        "feasibleSpace": {
            "min": "0.01",
            "max": "0.03"
        }
    }, {
        "name": "--num-layers",
        "parameterType": "int",
        "feasibleSpace": {
            "min": "2",
            "max": "5"
        }
    }, {
        "name": "--optimizer",
        "parameterType": "categorical",
        "feasibleSpace": {
            "list": ["sgd", "adam", "ftrl"]
        }
    }]
    rawTemplate = {
        "apiVersion": "batch/v1",
        "kind": "Job",
        "metadata": {
            "name": "{{.Trial}}",
            "namespace": "{{.NameSpace}}"
        },
        "spec": {
            "template": {
                "spec": {
                    "restartPolicy":
                    "Never",
                    "containers": [{
                        "name":
                        "{{.Trial}}",
                        "image":
                        "docker.io/katib/mxnet-mnist-example",
                        "command": [
                            "python /mxnet/example/image-classification/train_mnist.py --batch-size=64 {{- with .HyperParameters}} {{- range .}} {{.Name}}={{.Value}} {{- end}} {{- end}}"  # noqa E501
                        ]
                    }]
                }
            }
        }
    }
    trialTemplate = {"goTemplate": {"rawTemplate": json.dumps(rawTemplate)}}
    op1 = katib_experiment_launcher_op(
        name,
        namespace,
        parallelTrialCount=parallelTrialCount,
        maxTrialCount=maxTrialCount,
        objectiveConfig=str(objectiveConfig),
        algorithmConfig=str(algorithmConfig),
        trialTemplate=str(trialTemplate),
        parameters=str(parameters),
        experimentTimeoutMinutes=experimentTimeoutMinutes,
        deleteAfterDone=deleteAfterDone)

    op_out = dsl.ContainerOp(
        name="my-out-cop",
        image="library/bash:4.4.23",
        command=["sh", "-c"],
        arguments=["echo hyperparameter: %s" % op1.output],
    )
Exemple #27
0
  def __new__(cls, component_name, input_dict,
              output_dict,
              exec_properties):
    """Creates a new component.

    Args:
      component_name: TFX component name.
      input_dict: Dictionary of input names to TFX types, or
        kfp.dsl.PipelineParam representing input parameters.
      output_dict: Dictionary of output names to List of TFX types.
      exec_properties: Execution properties.

    Returns:
      Newly constructed TFX Kubeflow component instance.
    """
    outputs = output_dict.keys()
    file_outputs = {
        output: '/output/ml_metadata/{}'.format(output) for output in outputs
    }

    for k, v in ExecutionProperties.exec_properties.items():
      exec_properties[k] = v

    arguments = [
        '--exec_properties',
        json.dumps(exec_properties),
        '--outputs',
        types.jsonify_tfx_type_dict(output_dict),
        component_name,
    ]

    for k, v in input_dict.items():
      if isinstance(v, float) or isinstance(v, int):
        v = str(v)
      arguments.append('--{}'.format(k))
      arguments.append(v)

    container_op = dsl.ContainerOp(
        name=component_name,
        command=_COMMAND,
        image=_KUBEFLOW_TFX_IMAGE,
        arguments=arguments,
        file_outputs=file_outputs,
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))  # Adds GCP authentication.

    # Add the Argo workflow ID to the container's environment variable so it
    # can be used to uniquely place pipeline outputs under the pipeline_root.
    field_path = "metadata.labels['workflows.argoproj.io/workflow']"
    container_op.add_env_variable(
        k8s_client.V1EnvVar(
            name='WORKFLOW_ID',
            value_from=k8s_client.V1EnvVarSource(
                field_ref=k8s_client.V1ObjectFieldSelector(
                    field_path=field_path))))

    named_outputs = {output: container_op.outputs[output] for output in outputs}

    # This allows user code to refer to the ContainerOp 'op' output named 'x'
    # as op.outputs.x
    component_outputs = type('Output', (), named_outputs)

    return type(component_name, (BaseComponent,), {
        'container_op': container_op,
        'outputs': component_outputs
    })
Exemple #28
0
def kubeflow_training(
    output='', 
    project='',
    evaluation='gs://dataset-image-train/TFRecords/images/test_labels.csv',
    train='gs://dataset-image-train/TFRecords/images/train_labels.csv',
    schema='gs://ml-pipeline-playground/flower/schema.json',
    learning_rate=0.1,
    hidden_layer_size='100,50',
    steps=2000,
    target='label',
    workers=0,
    pss=0,
    preprocess_mode='local',
    predict_mode='local',
    optimizer_choice='SGD',
    batch_size_predict='',
    lambda_target=''
):
  output_template = str(output) + '/{{workflow.uid}}/{{pod.name}}/data'
  start_step = 1
  use_gpu = False

  
  if start_step <= 1:
    preprocess = dsl.ContainerOp(
      name='preprocess',
      image='gcr.io/celerates-playground/dock-img:latest',
      arguments=[
          '--training_data_file_pattern', train,
          '--evaluation_data_file_pattern', evaluation,
          '--schema', schema,
          '--gcp_project', project,
          '--run_mode', preprocess_mode,
          '--preprocessing_module', '',
          '--transformed_data_dir', output_template],
      file_outputs={'transformed_data_dir': '/output.txt'}
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
	
  else:
    preprocess = ObjectDict({
      'outputs': {
        'transformed_data_dir': output_template
      }
    }).apply(gcp.use_gcp_secret('user-gcp-sa'))

  
  if start_step <= 2:
    training = dsl.ContainerOp(
      name='training',
      image='gcr.io/celerates-playground/ml-pipeline-kubeflow-tf-trainer:latest',
        arguments=[
          '--transformed_data_dir', preprocess.output,
          '--schema', schema,
          '--learning_rate', learning_rate,
          '--hidden_layer_size', hidden_layer_size,
          '--steps', steps,
          '--target', target,
          '--preprocessing_module', '',
          '--optimizer', optimizer_choice,
          '--training_output_dir', output_template],
      file_outputs={'training_output_dir': '/output.txt'}
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  else:
    training = ObjectDict({
      'outputs': {
        'training_output_dir': output_template
      }
    }).apply(gcp.use_gcp_secret('user-gcp-sa'))
	
  if use_gpu:
        training.image = 'gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer-gpu:fe639f41661d8e17fcda64ff8242127620b80ba0',
        training.set_gpu_limit(1)

  if start_step <= 3:
    prediction = dsl.ContainerOp(
       name='prediction',
        image='gcr.io/celerates-playground/ml-pipeline-dataflow-tf-predict:latest',
        arguments=[
          '--data_file_pattern', evaluation,
          '--schema', schema,
          '--target_column', target,
          '--model', training.output,
          '--run_mode', predict_mode,
          '--gcp_project', project,
          '--batchsize', batch_size_predict,
          '--predictions_dir', output_template],
      file_outputs={'predictions_dir': '/output.txt'}
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))

  else:
    prediction = ObjectDict({
        'outputs': {
          'predictions_dir': output_template
        }
    }).apply(gcp.use_gcp_secret('user-gcp-sa'))


  if start_step <= 4:
    confusion_matrix = dsl.ContainerOp(
      name='confusion_matrix',
        image='gcr.io/celerates-playground/ml-pipeline-local-confusion-matrix:latest',
        arguments=[
          '--predictions', prediction.output,
          '--target_lambda', lambda_target,
          '--output_dir', output_template],
      file_outputs={
        'output_dir': '/mlpipeline-metrics.json',
      }
    ).apply(gcp.use_gcp_secret('user-gcp-sa'))
  else:
    confusion_matrix = ObjectDict({
      'outputs': {
        'output_dir': output_template
      }
    }).apply(gcp.use_gcp_secret('user-gcp-sa'))
Exemple #29
0
        def my_pipeline(msg1, json, kind, msg2='value2'):
            op = dsl.ContainerOp(name='echo', image='image', command=['sh', '-c'],
                                 arguments=['echo %s %s | tee /tmp/message.txt' % (msg1, msg2)],
                                 file_outputs={'merged': '/tmp/message.txt'}) \
              .add_volume_mount(k8s_client.V1VolumeMount(
                mount_path='/secret/gcp-credentials',
                name='gcp-credentials')) \
              .add_env_variable(k8s_client.V1EnvVar(
                name='GOOGLE_APPLICATION_CREDENTIALS',
                value='/secret/gcp-credentials/user-gcp-sa.json'))
            res = dsl.ResourceOp(
                name="test-resource",
                k8s_resource=k8s_client.V1PersistentVolumeClaim(
                    api_version="v1",
                    kind=kind,
                    metadata=k8s_client.V1ObjectMeta(name="resource")),
                attribute_outputs={"out": json})
            golden_output = {
                'container': {
                    'image':
                    'image',
                    'args': [
                        'echo {{inputs.parameters.msg1}} {{inputs.parameters.msg2}} | tee /tmp/message.txt'
                    ],
                    'command': ['sh', '-c'],
                    'env': [{
                        'name': 'GOOGLE_APPLICATION_CREDENTIALS',
                        'value': '/secret/gcp-credentials/user-gcp-sa.json'
                    }],
                    'volumeMounts': [{
                        'mountPath': '/secret/gcp-credentials',
                        'name': 'gcp-credentials',
                    }]
                },
                'inputs': {
                    'parameters': [
                        {
                            'name': 'msg1'
                        },
                        {
                            'name': 'msg2'
                        },
                    ]
                },
                'name': 'echo',
                'outputs': {
                    'artifacts': [
                        {
                            'name': 'echo-merged',
                            'path': '/tmp/message.txt',
                        },
                    ],
                    'parameters': [{
                        'name': 'echo-merged',
                        'valueFrom': {
                            'path': '/tmp/message.txt'
                        }
                    }],
                }
            }
            res_output = {
                'inputs': {
                    'parameters': [{
                        'name': 'json'
                    }, {
                        'name': 'kind'
                    }]
                },
                'name': 'test-resource',
                'outputs': {
                    'parameters': [{
                        'name': 'test-resource-manifest',
                        'valueFrom': {
                            'jsonPath': '{}'
                        }
                    }, {
                        'name': 'test-resource-name',
                        'valueFrom': {
                            'jsonPath': '{.metadata.name}'
                        }
                    }, {
                        'name': 'test-resource-out',
                        'valueFrom': {
                            'jsonPath': '{{inputs.parameters.json}}'
                        }
                    }]
                },
                'resource': {
                    'action':
                    'create',
                    'manifest': ("apiVersion: v1\n"
                                 "kind: '{{inputs.parameters.kind}}'\n"
                                 "metadata:\n"
                                 "  name: resource\n")
                }
            }

            self.maxDiff = None
            self.assertEqual(golden_output,
                             compiler._op_to_template._op_to_template(op))
            self.assertEqual(res_output,
                             compiler._op_to_template._op_to_template(res))
Exemple #30
0
def tacosandburritos_train(
        tenant_id,
        service_principal_id,
        service_principal_password,
        subscription_id,
        resource_group,
        workspace,
        persistent_volume_name='azure',
        persistent_volume_path='/mnt/azure',
        data_download='https://aiadvocate.blob.core.windows.net/public/tacodata.zip',
        epochs=5,
        batch=32,
        learning_rate=0.0001,
        imagetag='latest',
        model_name='tacosandburritos',
        profile_name='tacoprofile'):

    operations = {}
    image_size = 160
    training_folder = 'train'
    training_dataset = 'train.txt'
    model_folder = 'model'

    # preprocess data
    operations['preprocess'] = dsl.ContainerOp(
        name='preprocess',
        image='insert your image here',
        command=['python'],
        arguments=[
            '/scripts/data.py', '--base_path', persistent_volume_path,
            '--data', training_folder, '--target', training_dataset,
            '--img_size', image_size, '--zipfile', data_download
        ])

    #train
    operations['training'] = dsl.ContainerOp(
        name='training',
        image='insert your image here',
        command=['python'],
        arguments=[
            '/scripts/train.py', '--base_path', persistent_volume_path,
            '--data', training_folder, '--epochs', epochs, '--batch', batch,
            '--image_size', image_size, '--lr', learning_rate, '--outputs',
            model_folder, '--dataset', training_dataset
        ])
    operations['training'].after(operations['preprocess'])

    # register model
    operations['register'] = dsl.ContainerOp(
        name='register',
        image='insert your image here',
        command=['python'],
        arguments=[
            '/scripts/register.py', '--base_path', persistent_volume_path,
            '--model', 'latest.h5', '--model_name', model_name, '--tenant_id',
            tenant_id, '--service_principal_id', service_principal_id,
            '--service_principal_password', service_principal_password,
            '--subscription_id', subscription_id, '--resource_group',
            resource_group, '--workspace', workspace
        ])
    operations['register'].after(operations['training'])

    operations['profile'] = dsl.ContainerOp(
        name='profile',
        image='insert your image here',
        command=['sh'],
        arguments=[
            '/scripts/profile.sh', '-n', profile_name, '-m', model_name, '-i',
            '/scripts/inferenceconfig.json', '-d',
            '{"image":"https://www.exploreveg.org/files/2015/05/sofritas-burrito.jpeg"}',
            '-t', tenant_id, '-r', resource_group, '-w', workspace, '-s',
            service_principal_id, '-p', service_principal_password, '-u',
            subscription_id, '-b', persistent_volume_path
        ])
    operations['profile'].after(operations['register'])

    operations['deploy'] = dsl.ContainerOp(
        name='deploy',
        image='insert your image here',
        command=['sh'],
        arguments=[
            '/scripts/deploy.sh', '-n', model_name, '-m', model_name, '-i',
            '/scripts/inferenceconfig.json', '-d',
            '/scripts/deploymentconfig.json', '-t', tenant_id, '-r',
            resource_group, '-w', workspace, '-s', service_principal_id, '-p',
            service_principal_password, '-u', subscription_id, '-b',
            persistent_volume_path
        ])
    operations['deploy'].after(operations['profile'])

    for _, op in operations.items():
        op.container.set_image_pull_policy("Always")
        op.add_volume(
            k8s_client.V1Volume(
                name='azure',
                persistent_volume_claim=k8s_client.
                V1PersistentVolumeClaimVolumeSource(
                    claim_name='azure-managed-disk'))).add_volume_mount(
                        k8s_client.V1VolumeMount(mount_path='/mnt/azure',
                                                 name='azure'))