Beispiel #1
0
    def test_init_container(self):
        echo = dsl.UserContainer(name='echo',
                                 image='alpine:latest',
                                 command=['echo', 'bye'])

        @dsl.pipeline(name='InitContainer',
                      description='A pipeline with init container.')
        def init_container_pipeline():
            dsl.ContainerOp(name='hello',
                            image='alpine:latest',
                            command=['echo', 'hello'],
                            init_containers=[echo])

        workflow_dict = compiler.Compiler()._compile(init_container_pipeline)
        for template in workflow_dict['spec']['templates']:
            init_containers = template.get('initContainers', None)
            if init_containers:
                self.assertEqual(len(init_containers), 1)
                init_container = init_containers[0]
                self.assertEqual(
                    init_container, {
                        'image': 'alpine:latest',
                        'command': ['echo', 'bye'],
                        'name': 'echo'
                    })
Beispiel #2
0
def default_train(
    resource_group,
    workspace,
    dataset
):
    """Pipeline steps"""

    operations = {}
    callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080'

    exit_op = dsl.ContainerOp(
        name='Exit Handler',
        image="curlimages/curl",
        command=['curl'],
        arguments=[
            '-d', get_callback_payload(TRAIN_FINISH_EVENT),
            callback_url
        ]
    )

    with dsl.ExitHandler(exit_op):
        start_callback = \
            dsl.UserContainer('callback',
                              'curlimages/curl',
                              command=['curl'],
                              args=['-d',
                                    get_callback_payload(TRAIN_START_EVENT), callback_url])  # noqa: E501

        operations['start'] = dsl.ContainerOp(
            name='start',
            init_containers=[start_callback],
            image="busybox",
            command=['sh', '-c'],
            arguments=[
                'echo',
                'Pipeline starting'
            ]
        )

        operations['end'] = dsl.ContainerOp(
            name='End',
            image="curlimages/curl",
            command=['curl'],
            arguments=[
                '-d', get_callback_payload("Model is registered"),
                callback_url
            ]
        )
        operations['end'].after(operations['start'])

    for _, op_1 in operations.items():
        op_1.container.set_image_pull_policy("Always")
        op_1.add_volume(
            k8s_client.V1Volume(
              name='azure',
              persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(  # noqa: E501
                claim_name='azure-managed-file')
            )
        ).add_volume_mount(k8s_client.V1VolumeMount(
            mount_path='/mnt/azure', name='azure'))
Beispiel #3
0
def get_start_callback_container():
    return dsl.UserContainer(
        'callback',
        'curlimages/curl',
        command=['curl'],
        args=['-d',
              get_callback_payload(TRAIN_START_EVENT),
              callback_url])  # noqa: E501
Beispiel #4
0
def update_op(op: dsl.ContainerOp,
              pipeline_name: dsl.PipelineParam,
              pipeline_root: dsl.PipelineParam,
              launcher_image: Optional[str] = None) -> None:
    """Updates the passed in Op for running in v2-compatible mode.

    Args:
      op: The Op to update.
      pipeline_spec: The PipelineSpec for the pipeline under which `op`
        runs.
      pipeline_root: The root output directory for pipeline artifacts.
      launcher_image: An optional launcher image. Useful for tests.
    """
    op.is_v2 = True
    # Inject the launcher binary and overwrite the entrypoint.
    image_name = launcher_image or _DEFAULT_LAUNCHER_IMAGE
    launcher_container = dsl.UserContainer(
        name="kfp-launcher",
        image=image_name,
        command=["launcher", "--copy", "/kfp-launcher/launch"],
        mirror_volume_mounts=True)

    op.add_init_container(launcher_container)
    op.add_volume(k8s_client.V1Volume(name='kfp-launcher'))
    op.add_volume_mount(
        k8s_client.V1VolumeMount(name='kfp-launcher',
                                 mount_path='/kfp-launcher'))

    # op.command + op.args will have the following sections:
    # 1. args passed to kfp-launcher
    # 2. a separator "--"
    # 3. parameters in format "key1=value1", "key2=value2", ...
    # 4. a separator "--" as end of arguments passed to launcher
    # 5. (start of op.args) arguments of the original user program command + args
    #
    # example:
    # - command:
    # - /kfp-launcher/launch
    # - '--mlmd_server_address'
    # - $(METADATA_GRPC_SERVICE_HOST)
    # - '--mlmd_server_port'
    # - $(METADATA_GRPC_SERVICE_PORT)
    # - ... # more launcher params
    # - '--pipeline_task_id'
    # - $(KFP_POD_NAME)
    # - '--pipeline_root'
    # - ''
    # - '--' # start of parameter values
    # - first=first
    # - second=second
    # - '--' # start of user command and args
    # args:
    # - sh
    # - '-ec'
    # - |
    #     program_path=$(mktemp)
    #     printf "%s" "$0" > "$program_path"
    #     python3 -u "$program_path" "$@"
    # - >
    #     import json
    #     import xxx
    #     ...
    op.command = [
        "/kfp-launcher/launch",
        "--mlmd_server_address",
        "$(METADATA_GRPC_SERVICE_HOST)",
        "--mlmd_server_port",
        "$(METADATA_GRPC_SERVICE_PORT)",
        "--runtime_info_json",
        "$(KFP_V2_RUNTIME_INFO)",
        "--container_image",
        "$(KFP_V2_IMAGE)",
        "--task_name",
        op.name,
        "--pipeline_name",
        pipeline_name,
        "--run_id",
        "$(KFP_RUN_ID)",
        "--run_resource",
        "workflows.argoproj.io/$(WORKFLOW_ID)",
        "--namespace",
        "$(KFP_NAMESPACE)",
        "--pod_name",
        "$(KFP_POD_NAME)",
        "--pod_uid",
        "$(KFP_POD_UID)",
        "--pipeline_root",
        pipeline_root,
        "--enable_caching",
        "$(ENABLE_CACHING)",
    ]

    # Mount necessary environment variables.
    op.apply(_default_transformers.add_kfp_pod_env)
    op.container.add_env_variable(
        k8s_client.V1EnvVar(name="KFP_V2_IMAGE", value=op.container.image))

    config_map_ref = k8s_client.V1ConfigMapEnvSource(
        name='metadata-grpc-configmap', optional=True)
    op.container.add_env_from(
        k8s_client.V1EnvFromSource(config_map_ref=config_map_ref))

    op.arguments = list(op.container_spec.command) + list(
        op.container_spec.args)

    runtime_info = {
        "inputParameters": collections.OrderedDict(),
        "inputArtifacts": collections.OrderedDict(),
        "outputParameters": collections.OrderedDict(),
        "outputArtifacts": collections.OrderedDict(),
    }

    op.command += ["--"]
    component_spec = op.component_spec
    for parameter, spec in sorted(
            component_spec.input_definitions.parameters.items()):
        parameter_info = {
            "type":
            pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type),
        }
        op.command += [f"{parameter}={op._parameter_arguments[parameter]}"]
        runtime_info["inputParameters"][parameter] = parameter_info
    op.command += ["--"]

    for artifact_name, spec in sorted(
            component_spec.input_definitions.artifacts.items()):
        artifact_info = {
            "metadataPath": op.input_artifact_paths[artifact_name],
            "schemaTitle": spec.artifact_type.schema_title,
            "instanceSchema": spec.artifact_type.instance_schema,
        }
        runtime_info["inputArtifacts"][artifact_name] = artifact_info

    for parameter, spec in sorted(
            component_spec.output_definitions.parameters.items()):
        parameter_info = {
            "type":
            pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type),
            "path":
            op.file_outputs[parameter],
        }
        runtime_info["outputParameters"][parameter] = parameter_info

    for artifact_name, spec in sorted(
            component_spec.output_definitions.artifacts.items()):
        # TODO: Assert instance_schema.
        artifact_info = {
            # Type used to register output artifacts.
            "schemaTitle": spec.artifact_type.schema_title,
            "instanceSchema": spec.artifact_type.instance_schema,
            # File used to write out the registered artifact ID.
            "metadataPath": op.file_outputs[artifact_name],
        }
        runtime_info["outputArtifacts"][artifact_name] = artifact_info

    op.container.add_env_variable(
        k8s_client.V1EnvVar(name="KFP_V2_RUNTIME_INFO",
                            value=json.dumps(runtime_info)))

    op.pod_annotations['pipelines.kubeflow.org/v2_component'] = "true"
    op.pod_labels['pipelines.kubeflow.org/v2_component'] = "true"
Beispiel #5
0
def cnn_train(resource_group, workspace, dataset, token):
    """Pipeline steps"""

    persistent_volume_path = '/mnt/azure'
    data_download = dataset  # noqa: E501
    batch = 32
    model_name = 'cnnmodel'
    operations = {}
    image_size = 160
    training_folder = 'train'
    training_dataset = 'train.txt'
    model_folder = 'model'
    image_repo_name = "k8scc01covidmlopsacr.azurecr.io/mlops"
    callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080'
    mlflow_url = 'http://mlflow.mlflow:5000'

    exit_op = dsl.ContainerOp(name='Exit Handler',
                              image="curlimages/curl",
                              command=['curl'],
                              arguments=[
                                  '-d',
                                  get_callback_payload(TRAIN_FINISH_EVENT),
                                  callback_url
                              ])

    with dsl.ExitHandler(exit_op):
        start_callback = \
            dsl.UserContainer('callback',
                              'curlimages/curl',
                              command=['curl'],
                              args=['-d',
                                    get_callback_payload(TRAIN_START_EVENT), callback_url])  # noqa: E501

        operations['tensorflow preprocess'] = dsl.ContainerOp(
            name='tensorflow preprocess',
            init_containers=[start_callback],
            image=image_repo_name + '/tensorflow-preprocess:latest',
            command=['python'],
            arguments=[
                '/scripts/data.py', '--base_path', persistent_volume_path,
                '--data', training_folder, '--target', training_dataset,
                '--img_size', image_size, '--zipfile', data_download
            ])

        operations['tensorflow training'] = dsl.ContainerOp(
            name="tensorflow training",
            image=image_repo_name + '/tensorflow-training:latest',
            command=['python'],
            arguments=[
                '/scripts/train.py', '--base_path', persistent_volume_path,
                '--data', training_folder, '--epochs', 2, '--batch', batch,
                '--image_size', image_size, '--lr', 0.0001, '--outputs',
                model_folder, '--dataset', training_dataset
            ],
            output_artifact_paths={
                'mlpipeline-metrics': '/mlpipeline-metrics.json',
                'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
            }).apply(use_azstorage_secret()).add_env_variable(
                V1EnvVar(name="RUN_ID",
                         value=dsl.RUN_ID_PLACEHOLDER)).add_env_variable(
                             V1EnvVar(
                                 name="MLFLOW_TRACKING_TOKEN",
                                 value=token)).add_env_variable(
                                     V1EnvVar(
                                         name="MLFLOW_TRACKING_URI",
                                         value=mlflow_url)).add_env_variable(
                                             V1EnvVar(
                                                 name="GIT_PYTHON_REFRESH",
                                                 value='quiet'))  # noqa: E501

        operations['tensorflow training'].after(
            operations['tensorflow preprocess'])  # noqa: E501

        operations['evaluate'] = dsl.ContainerOp(
            name='evaluate',
            image="busybox",
            command=['sh', '-c'],
            arguments=['echo', 'Life is Good!'])
        operations['evaluate'].after(operations['tensorflow training'])

        operations['register kubeflow'] = dsl.ContainerOp(
            name='register kubeflow',
            image=image_repo_name + '/register-kubeflow-artifacts:latest',
            command=['python'],
            arguments=[
                '/scripts/register.py', '--base_path', persistent_volume_path,
                '--model', 'latest.h5', '--model_name', model_name, '--data',
                training_folder, '--dataset', training_dataset, '--run_id',
                dsl.RUN_ID_PLACEHOLDER
            ]).apply(use_azure_secret())
        operations['register kubeflow'].after(operations['evaluate'])

        operations['register AML'] = dsl.ContainerOp(
            name='register AML',
            image=image_repo_name + '/register-aml:latest',
            command=['python'],
            arguments=[
                '/scripts/register.py', '--base_path', persistent_volume_path,
                '--model', 'latest.h5', '--model_name', model_name,
                '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id',
                "$(AZ_CLIENT_ID)", '--service_principal_password',
                "$(AZ_CLIENT_SECRET)", '--subscription_id',
                "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group,
                '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER
            ]).apply(use_azure_secret())
        operations['register AML'].after(operations['register kubeflow'])

        operations['register mlflow'] = dsl.ContainerOp(
            name='register mlflow',
            image=image_repo_name + '/register-mlflow:latest',
            command=['python'],
            arguments=[
                '/scripts/register.py', '--model', 'model', '--model_name',
                model_name, '--experiment_name', 'kubeflow-mlops', '--run_id',
                dsl.RUN_ID_PLACEHOLDER
            ]).apply(use_azure_secret()).add_env_variable(
                V1EnvVar(name="MLFLOW_TRACKING_URI",
                         value=mlflow_url)).add_env_variable(
                             V1EnvVar(name="MLFLOW_TRACKING_TOKEN",
                                      value=token))  # noqa: E501
        operations['register mlflow'].after(operations['register AML'])

        operations['finalize'] = dsl.ContainerOp(
            name='Finalize',
            image="curlimages/curl",
            command=['curl'],
            arguments=[
                '-d',
                get_callback_payload("Model is registered"), callback_url
            ])
        operations['finalize'].after(operations['register mlflow'])

    for _, op_1 in operations.items():
        op_1.container.set_image_pull_policy("Always")
        op_1.add_volume(
            k8s_client.V1Volume(
                name='azure',
                persistent_volume_claim=k8s_client.
                V1PersistentVolumeClaimVolumeSource(  # noqa: E501
                    claim_name='azure-managed-file'))).add_volume_mount(
                        k8s_client.V1VolumeMount(mount_path='/mnt/azure',
                                                 name='azure'))
Beispiel #6
0
def update_op(op: dsl.ContainerOp,
              pipeline_name: dsl.PipelineParam,
              pipeline_root: dsl.PipelineParam,
              launcher_image: Optional[str] = None) -> None:
  """Updates the passed in Op for running in v2-compatible mode.

    Args:
      op: The Op to update.
      pipeline_spec: The PipelineSpec for the pipeline under which `op`
        runs.
      pipeline_root: The root output directory for pipeline artifacts.
      launcher_image: An optional launcher image. Useful for tests.
    """
  # Inject the launcher binary and overwrite the entrypoint.
  image_name = launcher_image or _DEFAULT_LAUNCHER_IMAGE
  launcher_container = dsl.UserContainer(name="kfp-launcher",
                                         image=image_name,
                                         command="/bin/mount_launcher.sh",
                                         mirror_volume_mounts=True)

  op.add_init_container(launcher_container)
  op.add_volume(k8s_client.V1Volume(name='kfp-launcher'))
  op.add_volume_mount(
      k8s_client.V1VolumeMount(name='kfp-launcher', mount_path='/kfp-launcher'))

  op.command = [
      "/kfp-launcher/launch",
      "--mlmd_server_address",
      "$(METADATA_GRPC_SERVICE_HOST)",
      "--mlmd_server_port",
      "$(METADATA_GRPC_SERVICE_PORT)",
      "--runtime_info_json",
      "$(KFP_V2_RUNTIME_INFO)",
      "--container_image",
      "$(KFP_V2_IMAGE)",
      "--task_name",
      op.name,
      "--pipeline_name",
      pipeline_name,
      "--pipeline_run_id",
      "$(WORKFLOW_ID)",
      "--pipeline_task_id",
      "$(KFP_POD_NAME)",
      "--pipeline_root",
      pipeline_root,
  ]

  # Mount necessary environment variables.
  op.apply(_default_transformers.add_kfp_pod_env)
  op.container.add_env_variable(
      k8s_client.V1EnvVar(name="KFP_V2_IMAGE", value=op.container.image))

  config_map_ref = k8s_client.V1ConfigMapEnvSource(
      name='metadata-grpc-configmap', optional=True)
  op.container.add_env_from(
      k8s_client.V1EnvFromSource(config_map_ref=config_map_ref))

  op.arguments = list(op.container_spec.command) + list(op.container_spec.args)

  runtime_info = {
      "inputParameters": collections.OrderedDict(),
      "inputArtifacts": collections.OrderedDict(),
      "outputParameters": collections.OrderedDict(),
      "outputArtifacts": collections.OrderedDict(),
  }

  component_spec = op.component_spec
  for parameter, spec in sorted(
      component_spec.input_definitions.parameters.items()):
    parameter_info = {
        "parameterType":
            pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type),
        "parameterValue":
            op._parameter_arguments[parameter],
    }
    runtime_info["inputParameters"][parameter] = parameter_info

  for artifact_name, spec in sorted(
      component_spec.input_definitions.artifacts.items()):
    artifact_info = {"fileInputPath": op.input_artifact_paths[artifact_name]}
    runtime_info["inputArtifacts"][artifact_name] = artifact_info

  for parameter, spec in sorted(
      component_spec.output_definitions.parameters.items()):
    parameter_info = {
        "parameterType":
            pipeline_spec_pb2.PrimitiveType.PrimitiveTypeEnum.Name(spec.type),
        "fileOutputPath":
            op.file_outputs[parameter],
    }
    runtime_info["outputParameters"][parameter] = parameter_info

  for artifact_name, spec in sorted(
      component_spec.output_definitions.artifacts.items()):
    # TODO: Assert instance_schema.
    artifact_info = {
        # Type used to register output artifacts.
        "artifactSchema": spec.artifact_type.instance_schema,
        # File used to write out the registered artifact ID.
        "fileOutputPath": op.file_outputs[artifact_name],
    }
    runtime_info["outputArtifacts"][artifact_name] = artifact_info

  op.container.add_env_variable(
      k8s_client.V1EnvVar(name="KFP_V2_RUNTIME_INFO",
                          value=json.dumps(runtime_info)))

  op.pod_annotations['pipelines.kubeflow.org/v2_component'] = "true"
Beispiel #7
0
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utility functions for enabling v2-compatible pipelines in v1."""
import collections
import json

from kfp import dsl
from kfp.compiler import _default_transformers
from kfp.pipeline_spec import pipeline_spec_pb2
from kfp.v2 import compiler

from kubernetes import client as k8s_client

_LAUNCHER_CONTAINER = dsl.UserContainer(
    name="kfp-launcher",
    image="gcr.io/ml-pipeline/kfp-launcher",
    command="/bin/mount_launcher.sh",
    mirror_volume_mounts=True)


def update_op(op: dsl.ContainerOp, pipeline_name: dsl.PipelineParam,
              pipeline_root: dsl.PipelineParam) -> None:
    """Updates the passed in Op for running in v2-compatible mode.

    Args:
      op: The Op to update.
      pipeline_spec: The PipelineSpec for the pipeline under which `op`
        runs.
      pipeline_root: The root output directory for pipeline artifacts.
    """
    # Inject the launcher binary and overwrite the entrypoint.
Beispiel #8
0
def tacosandburritos_train(
    resource_group,
    workspace
):
    """Pipeline steps"""

    persistent_volume_path = '/mnt/azure'
    data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip'  # noqa: E501
    epochs = 2
    batch = 32
    learning_rate = 0.0001
    model_name = 'tacosandburritos'
    operations = {}
    image_size = 160
    training_folder = 'train'
    training_dataset = 'train.txt'
    model_folder = 'model'
    image_repo_name = "kubeflowyoacr.azurecr.io/mexicanfood"
    callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080'

    exit_op = dsl.ContainerOp(
        name='Exit Handler',
        image="curlimages/curl",
        command=['curl'],
        arguments=[
            '-d', get_callback_payload(TRAIN_FINISH_EVENT),
            callback_url
        ]
    )

    with dsl.ExitHandler(exit_op):
        start_callback = \
            dsl.UserContainer('callback',
                              'curlimages/curl',
                              command=['curl'],
                              args=['-d',
                                    get_callback_payload(TRAIN_START_EVENT), callback_url])  # noqa: E501
        operations['preprocess'] = dsl.ContainerOp(
            name='preprocess',
            init_containers=[start_callback],
            image=image_repo_name + '/preprocess:latest',
            command=['python'],
            arguments=[
                '/scripts/data.py',
                '--base_path', persistent_volume_path,
                '--data', training_folder,
                '--target', training_dataset,
                '--img_size', image_size,
                '--zipfile', data_download
            ]
        )

        # train
        operations['training'] = dsl.ContainerOp(
            name='training',
            image=image_repo_name + '/training:latest',
            command=['python'],
            arguments=[
                '/scripts/train.py',
                '--base_path', persistent_volume_path,
                '--data', training_folder,
                '--epochs', epochs,
                '--batch', batch,
                '--image_size', image_size,
                '--lr', learning_rate,
                '--outputs', model_folder,
                '--dataset', training_dataset
            ]
        )
        operations['training'].after(operations['preprocess'])

        # register model
        operations['register'] = dsl.ContainerOp(
            name='register',
            image=image_repo_name + '/register:latest',
            command=['python'],
            arguments=[
                '/scripts/register.py',
                '--base_path', persistent_volume_path,
                '--model', 'latest.h5',
                '--model_name', model_name,
                '--tenant_id', "$(AZ_TENANT_ID)",
                '--service_principal_id', "$(AZ_CLIENT_ID)",
                '--service_principal_password', "$(AZ_CLIENT_SECRET)",
                '--subscription_id', "$(AZ_SUBSCRIPTION_ID)",
                '--resource_group', resource_group,
                '--workspace', workspace,
                '--run_id', dsl.RUN_ID_PLACEHOLDER
            ]
        ).apply(use_azure_secret())
        operations['register'].after(operations['training'])

        operations['finalize'] = dsl.ContainerOp(
            name='Finalize',
            image="curlimages/curl",
            command=['curl'],
            arguments=[
                '-d', get_callback_payload("Model is registered"),
                callback_url
            ]
        )
        operations['finalize'].after(operations['register'])

    # operations['deploy'] = dsl.ContainerOp(
    #     name='deploy',
    #     image=image_repo_name + '/deploy:latest',
    #     command=['sh'],
    #     arguments=[
    #         '/scripts/deploy.sh',
    #         '-n', model_name,
    #         '-m', model_name,
    #         '-t', "$(AZ_TENANT_ID)",
    #         '-r', resource_group,
    #         '-w', workspace,
    #         '-s', "$(AZ_CLIENT_ID)",
    #         '-p', "$(AZ_CLIENT_SECRET)",
    #         '-u', "$(AZ_SUBSCRIPTION_ID)",
    #         '-b', persistent_volume_path,
    #         '-x', dsl.RUN_ID_PLACEHOLDER
    #     ]
    # ).apply(use_azure_secret())
    # operations['deploy'].after(operations['register'])

    for _, op_1 in operations.items():
        op_1.container.set_image_pull_policy("Always")
        op_1.add_volume(
            k8s_client.V1Volume(
              name='azure',
              persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource(  # noqa: E501
                claim_name='azure-managed-disk')
            )
        ).add_volume_mount(k8s_client.V1VolumeMount(
            mount_path='/mnt/azure', name='azure'))
Beispiel #9
0
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#      http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from kfp import dsl


echo = dsl.UserContainer(
    name='echo',
    image='alpine:latest',
    command=['echo', 'bye'])

@dsl.pipeline(name='InitContainer', description='A pipeline with init container.')
def init_container_pipeline():
    dsl.ContainerOp(
        name='hello',
        image='alpine:latest',
        command=['echo', 'hello'],
        init_containers=[echo])


if __name__ == '__main__':
    from kfp_tekton.compiler import TektonCompiler
    TektonCompiler().compile(init_container_pipeline, __file__.replace('.py', '.yaml'))
Beispiel #10
0
def tacosandburritos_train(resource_group, workspace, dataset):
    """Pipeline steps"""

    persistent_volume_path = '/mnt/azure'
    data_download = dataset  # noqa: E501
    batch = 32
    model_name = 'tacosandburritos'
    operations = {}
    image_size = 160
    training_folder = 'train'
    training_dataset = 'train.txt'
    model_folder = 'model'
    image_repo_name = "kubeflowyoacr.azurecr.io/mexicanfood"
    callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080'
    mlflow_url = 'http://mlflow:5000'

    exit_op = dsl.ContainerOp(name='Exit Handler',
                              image="curlimages/curl",
                              command=['curl'],
                              arguments=[
                                  '-d',
                                  get_callback_payload(TRAIN_FINISH_EVENT),
                                  callback_url
                              ])

    with dsl.ExitHandler(exit_op):
        start_callback = \
            dsl.UserContainer('callback',
                              'curlimages/curl',
                              command=['curl'],
                              args=['-d',
                                    get_callback_payload(TRAIN_START_EVENT), callback_url])  # noqa: E501

        operations['data processing on databricks'] = dsl.ContainerOp(
            name='data processing on databricks',
            init_containers=[start_callback],
            image=image_repo_name + '/databricks-notebook:latest',
            arguments=[
                '-r', dsl.RUN_ID_PLACEHOLDER, '-p',
                '{"argument_one":"param one","argument_two":"param two"}'
            ]).apply(use_databricks_secret())

        operations['preprocess'] = dsl.ContainerOp(
            name='preprocess',
            image=image_repo_name + '/preprocess:latest',
            command=['python'],
            arguments=[
                '/scripts/data.py', '--base_path', persistent_volume_path,
                '--data', training_folder, '--target', training_dataset,
                '--img_size', image_size, '--zipfile', data_download
            ])

        operations['preprocess'].after(
            operations['data processing on databricks'])  # noqa: E501

        #  train
        #  TODO: read set of parameters from config file
        with dsl.ParallelFor([{
                'epochs': 1,
                'lr': 0.0001
        }, {
                'epochs': 2,
                'lr': 0.0002
        }, {
                'epochs': 3,
                'lr': 0.0003
        }]) as item:  # noqa: E501
            operations['training'] = dsl.ContainerOp(
                name="training",
                image=image_repo_name + '/training:latest',
                command=['python'],
                arguments=[
                    '/scripts/train.py', '--base_path', persistent_volume_path,
                    '--data', training_folder, '--epochs', item.epochs,
                    '--batch', batch, '--image_size', image_size, '--lr',
                    item.lr, '--outputs', model_folder, '--dataset',
                    training_dataset
                ],
                output_artifact_paths=
                {  # change output_artifact_paths to file_outputs after this PR is merged https://github.com/kubeflow/pipelines/pull/2334 # noqa: E501
                    'mlpipeline-metrics': '/mlpipeline-metrics.json',
                    'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'
                }).add_env_variable(
                    V1EnvVar(name="RUN_ID",
                             value=dsl.RUN_ID_PLACEHOLDER)).add_env_variable(
                                 V1EnvVar(name="MLFLOW_TRACKING_URI",
                                          value=mlflow_url)).add_env_variable(
                                              V1EnvVar(
                                                  name="GIT_PYTHON_REFRESH",
                                                  value='quiet'))  # noqa: E501

        operations['training'].after(operations['preprocess'])

        operations['evaluate'] = dsl.ContainerOp(
            name='evaluate',
            image="busybox",
            command=['sh', '-c'],
            arguments=['echo', 'Life is Good!'])
        operations['evaluate'].after(operations['training'])

        # register kubeflow artifcats model
        operations['register to kubeflow'] = dsl.ContainerOp(
            name='register to kubeflow',
            image=image_repo_name + '/registerartifacts:latest',
            command=['python'],
            arguments=[
                '/scripts/registerartifacts.py', '--base_path',
                persistent_volume_path, '--model', 'latest.h5', '--model_name',
                model_name, '--data', training_folder, '--dataset',
                training_dataset, '--run_id', dsl.RUN_ID_PLACEHOLDER
            ]).apply(use_azure_secret())
        operations['register to kubeflow'].after(operations['evaluate'])

        # register model
        operations['register to AML'] = dsl.ContainerOp(
            name='register to AML',
            image=image_repo_name + '/register:latest',
            command=['python'],
            arguments=[
                '/scripts/register.py', '--base_path', persistent_volume_path,
                '--model', 'latest.h5', '--model_name', model_name,
                '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id',
                "$(AZ_CLIENT_ID)", '--service_principal_password',
                "$(AZ_CLIENT_SECRET)", '--subscription_id',
                "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group,
                '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER
            ]).apply(use_azure_secret())
        operations['register to AML'].after(operations['register to kubeflow'])

        # register model to mlflow
        operations['register to mlflow'] = dsl.ContainerOp(
            name='register to mlflow',
            image=image_repo_name + '/register-mlflow:latest',
            command=['python'],
            arguments=[
                '/scripts/register.py', '--model', 'model', '--model_name',
                model_name, '--experiment_name', 'mexicanfood', '--run_id',
                dsl.RUN_ID_PLACEHOLDER
            ]).apply(use_azure_secret()).add_env_variable(
                V1EnvVar(name="MLFLOW_TRACKING_URI",
                         value=mlflow_url))  # noqa: E501
        operations['register to mlflow'].after(operations['register to AML'])

        operations['finalize'] = dsl.ContainerOp(
            name='Finalize',
            image="curlimages/curl",
            command=['curl'],
            arguments=[
                '-d',
                get_callback_payload("Model is registered"), callback_url
            ])
        operations['finalize'].after(operations['register to mlflow'])

    # operations['deploy'] = dsl.ContainerOp(
    #     name='deploy',
    #     image=image_repo_name + '/deploy:latest',
    #     command=['sh'],
    #     arguments=[
    #         '/scripts/deploy.sh',
    #         '-n', model_name,
    #         '-m', model_name,
    #         '-t', "$(AZ_TENANT_ID)",
    #         '-r', resource_group,
    #         '-w', workspace,
    #         '-s', "$(AZ_CLIENT_ID)",
    #         '-p', "$(AZ_CLIENT_SECRET)",
    #         '-u', "$(AZ_SUBSCRIPTION_ID)",
    #         '-b', persistent_volume_path,
    #         '-x', dsl.RUN_ID_PLACEHOLDER
    #     ]
    # ).apply(use_azure_secret())
    # operations['deploy'].after(operations['register'])

    for _, op_1 in operations.items():
        op_1.container.set_image_pull_policy("Always")
        op_1.add_volume(
            k8s_client.V1Volume(
                name='azure',
                persistent_volume_claim=k8s_client.
                V1PersistentVolumeClaimVolumeSource(  # noqa: E501
                    claim_name='azure-managed-file'))).add_volume_mount(
                        k8s_client.V1VolumeMount(mount_path='/mnt/azure',
                                                 name='azure'))