def test_init_container(self):
        # GIVEN
        volume_mounts = [
            k8s.V1VolumeMount(mount_path='/etc/foo', name='test-volume', sub_path=None, read_only=True)
        ]

        init_environments = [
            k8s.V1EnvVar(name='key1', value='value1'),
            k8s.V1EnvVar(name='key2', value='value2'),
        ]

        init_container = k8s.V1Container(
            name="init-container",
            image="ubuntu:16.04",
            env=init_environments,
            volume_mounts=volume_mounts,
            command=["bash", "-cx"],
            args=["echo 10"],
        )

        volume = k8s.V1Volume(
            name='test-volume',
            persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name='test-volume'),
        )
        expected_init_container = {
            'name': 'init-container',
            'image': 'ubuntu:16.04',
            'command': ['bash', '-cx'],
            'args': ['echo 10'],
            'env': [{'name': 'key1', 'value': 'value1'}, {'name': 'key2', 'value': 'value2'}],
            'volumeMounts': [{'mountPath': '/etc/foo', 'name': 'test-volume', 'readOnly': True}],
        }

        k = KubernetesPodOperator(
            namespace='default',
            image="ubuntu:16.04",
            cmds=["bash", "-cx"],
            arguments=["echo 10"],
            labels={"foo": "bar"},
            name="test-" + str(random.randint(0, 1000000)),
            task_id="task" + self.get_current_task_name(),
            volumes=[volume],
            init_containers=[init_container],
            in_cluster=False,
            do_xcom_push=False,
        )
        context = create_context(k)
        k.execute(context)
        actual_pod = self.api_client.sanitize_for_serialization(k.pod)
        self.expected_pod['spec']['initContainers'] = [expected_init_container]
        self.expected_pod['spec']['volumes'] = [
            {'name': 'test-volume', 'persistentVolumeClaim': {'claimName': 'test-volume'}}
        ]
        assert self.expected_pod == actual_pod
Esempio n. 2
0
        def _construct_volume(name, claim, host) -> k8s.V1Volume:
            volume = k8s.V1Volume(name=name)

            if claim:
                volume.persistent_volume_claim = k8s.V1PersistentVolumeClaimVolumeSource(
                    claim_name=claim)
            elif host:
                volume.host_path = k8s.V1HostPathVolumeSource(path=host,
                                                              type='')
            else:
                volume.empty_dir = {}

            return volume
    def test_volume_mount(self):
        with mock.patch.object(PodLauncher, 'log') as mock_logger:
            volume_mount = k8s.V1VolumeMount(
                name='test-volume', mount_path='/tmp/test_volume', sub_path=None, read_only=False
            )

            volume = k8s.V1Volume(
                name='test-volume',
                persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name='test-volume'),
            )

            args = [
                "echo \"retrieved from mount\" > /tmp/test_volume/test.txt "
                "&& cat /tmp/test_volume/test.txt"
            ]
            k = KubernetesPodOperator(
                namespace='default',
                image="ubuntu:16.04",
                cmds=["bash", "-cx"],
                arguments=args,
                labels={"foo": "bar"},
                volume_mounts=[volume_mount],
                volumes=[volume],
                name="test-" + str(random.randint(0, 1000000)),
                task_id="task" + self.get_current_task_name(),
                in_cluster=False,
                do_xcom_push=False,
            )
            context = create_context(k)
            k.execute(context=context)
            mock_logger.info.assert_any_call('retrieved from mount')
            actual_pod = self.api_client.sanitize_for_serialization(k.pod)
            self.expected_pod['spec']['containers'][0]['args'] = args
            self.expected_pod['spec']['containers'][0]['volumeMounts'] = [
                {'name': 'test-volume', 'mountPath': '/tmp/test_volume', 'readOnly': False}
            ]
            self.expected_pod['spec']['volumes'] = [
                {'name': 'test-volume', 'persistentVolumeClaim': {'claimName': 'test-volume'}}
            ]
            assert self.expected_pod == actual_pod
def pipeline_definition(
    hydrosphere_name="local",
    hydrosphere_address="http://hydro-serving-sidecar-serving.kubeflow.svc.cluster.local:8080",
    data_directory='/data/mnist',
    models_directory="/models/mnist",
    learning_rate="0.01",
    learning_steps="5000",
    batch_size="256",
    warmpup_count="100",
    model_name="mnist",
    application_name="mnist-app",
    signature_name="predict",
    acceptable_accuracy="0.90",
):

    data_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="data")
    models_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="models")
    data_volume = k8s.V1Volume(name="data", persistent_volume_claim=data_pvc)
    models_volume = k8s.V1Volume(name="models",
                                 persistent_volume_claim=models_pvc)
    data_volume_mount = k8s.V1VolumeMount(
        mount_path="{{workflow.parameters.data-directory}}", name="data")
    models_volume_mount = k8s.V1VolumeMount(
        mount_path="{{workflow.parameters.models-directory}}", name="models")

    hydrosphere_address_env = k8s.V1EnvVar(
        name="CLUSTER_ADDRESS",
        value="{{workflow.parameters.hydrosphere-address}}")
    hydrosphere_name_env = k8s.V1EnvVar(
        name="CLUSTER_NAME", value="{{workflow.parameters.hydrosphere-name}}")
    data_directory_env = k8s.V1EnvVar(
        name="MNIST_DATA_DIR", value="{{workflow.parameters.data-directory}}")
    models_directory_env = k8s.V1EnvVar(
        name="MNIST_MODELS_DIR",
        value="{{workflow.parameters.models-directory}}")
    model_name_env = k8s.V1EnvVar(name="MODEL_NAME",
                                  value="{{workflow.parameters.model-name}}")
    application_name_env = k8s.V1EnvVar(
        name="APPLICATION_NAME",
        value="{{workflow.parameters.application-name}}")
    signature_name_env = k8s.V1EnvVar(
        name="SIGNATURE_NAME", value="{{workflow.parameters.signature-name}}")
    acceptable_accuracy_env = k8s.V1EnvVar(
        name="ACCEPTABLE_ACCURACY",
        value="{{workflow.parameters.acceptable-accuracy}}")
    learning_rate_env = k8s.V1EnvVar(
        name="LEARNING_RATE", value="{{workflow.parameters.learning-rate}}")
    learning_steps_env = k8s.V1EnvVar(
        name="LEARNING_STEPS", value="{{workflow.parameters.learning-steps}}")
    batch_size_env = k8s.V1EnvVar(name="BATCH_SIZE",
                                  value="{{workflow.parameters.batch-size}}")
    warmup_count_env = k8s.V1EnvVar(
        name="WARMUP_IMAGES_AMOUNT",
        value="{{workflow.parameters.warmpup-count}}")

    # 1. Download MNIST data
    download = dsl.ContainerOp(
        name="download", image="tidylobster/mnist-pipeline-download:latest")
    download.add_volume(data_volume)
    download.add_volume_mount(data_volume_mount)
    download.add_env_variable(data_directory_env)

    # 2. Train and save a MNIST classifier using Tensorflow
    train = dsl.ContainerOp(name="train",
                            image="tidylobster/mnist-pipeline-train:latest")
    train.after(download)
    train.set_memory_request('2G')
    train.set_cpu_request('1')

    train.add_volume(data_volume)
    train.add_volume(models_volume)
    train.add_volume_mount(data_volume_mount)
    train.add_volume_mount(models_volume_mount)
    train.add_env_variable(data_directory_env)
    train.add_env_variable(models_directory_env)
    train.add_env_variable(learning_rate_env)
    train.add_env_variable(learning_steps_env)
    train.add_env_variable(batch_size_env)

    # 3. Upload trained model to the cluster
    upload = dsl.ContainerOp(
        name="upload",
        image="tidylobster/mnist-pipeline-upload:latest",
        file_outputs={"model_version": "/model_version.txt"})
    upload.after(train)

    upload.add_volume(models_volume)
    upload.add_volume_mount(models_volume_mount)
    upload.add_env_variable(models_directory_env)
    upload.add_env_variable(model_name_env)
    upload.add_env_variable(hydrosphere_name_env)
    upload.add_env_variable(hydrosphere_address_env)

    # 4. Deploy application
    deploy = dsl.ContainerOp(name="deploy",
                             image="tidylobster/mnist-pipeline-deploy:latest",
                             arguments=[upload.outputs["model_version"]])
    deploy.after(upload)

    deploy.add_env_variable(hydrosphere_name_env)
    deploy.add_env_variable(hydrosphere_address_env)
    deploy.add_env_variable(application_name_env)
    deploy.add_env_variable(model_name_env)

    # 5. Test the model
    test = dsl.ContainerOp(name="test",
                           image="tidylobster/mnist-pipeline-test:latest")
    test.after(deploy)

    test.add_volume(data_volume)
    test.add_volume_mount(data_volume_mount)
    test.add_env_variable(data_directory_env)
    test.add_env_variable(hydrosphere_address_env)
    test.add_env_variable(application_name_env)
    test.add_env_variable(signature_name_env)
    test.add_env_variable(warmup_count_env)
    test.add_env_variable(acceptable_accuracy_env)

    # 6. Clean environment
    clean = dsl.ContainerOp(name="clean",
                            image="tidylobster/mnist-pipeline-clean:latest")
    clean.after(test)

    clean.add_volume(data_volume)
    clean.add_volume_mount(data_volume_mount)
    clean.add_env_variable(data_directory_env)
    clean.add_volume(models_volume)
    clean.add_volume_mount(models_volume_mount)
    clean.add_env_variable(models_directory_env)
    default_args=ai_training_run_dag_default_args,
    schedule_interval=None,
    start_date=days_ago(2),
    tags=['training']
)

# Define Kubernetes namespace to execute DAG in
namespace = 'airflow'

## Define volume details (change values as necessary to match your environment)

# Dataset volume
dataset_volume_pvc_existing = 'dataset-vol'
dataset_volume = k8s.V1Volume(
    name=dataset_volume_pvc_existing,
    persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name=dataset_volume_pvc_existing),
)
dataset_volume_mount = k8s.V1VolumeMount(
    name=dataset_volume_pvc_existing, 
    mount_path='/mnt/dataset', 
    sub_path=None, 
    read_only=False
)

# Model volume
model_volume_pvc_existing = 'airflow-model-vol'
model_volume = k8s.V1Volume(
    name=model_volume_pvc_existing,
    persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name=model_volume_pvc_existing),
)
model_volume_mount = k8s.V1VolumeMount(
Esempio n. 6
0
secret_all_keys = Secret("env", None, "airflow-secrets-2")
volume_mount = k8s.V1VolumeMount(name="test-volume",
                                 mount_path="/root/mount_file",
                                 sub_path=None,
                                 read_only=True)

configmaps = [
    k8s.V1EnvFromSource(config_map_ref=k8s.V1ConfigMapEnvSource(
        name="test-configmap-1")),
    k8s.V1EnvFromSource(config_map_ref=k8s.V1ConfigMapEnvSource(
        name="test-configmap-2")),
]

volume = k8s.V1Volume(
    name="test-volume",
    persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(
        claim_name="test-volume"),
)

port = k8s.V1ContainerPort(name="http", container_port=80)

init_container_volume_mounts = [
    k8s.V1VolumeMount(mount_path="/etc/foo",
                      name="test-volume",
                      sub_path=None,
                      read_only=True)
]

init_environments = [
    k8s.V1EnvVar(name="key1", value="value1"),
    k8s.V1EnvVar(name="key2", value="value2"),
]
from datetime import datetime, timedelta
from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import KubernetesPodOperator
from airflow.operators.dummy_operator import DummyOperator
from kubernetes.client import models as k8s
from airflow.utils.dates import days_ago
from airflow.models import Variable
from airflow.operators.bash import BashOperator
from airflow.operators.http_operator import SimpleHttpOperator
import urllib.request
import json

default_args = {'owner': 'datagap'}

volume = k8s.V1Volume(
    name='data-volume',
    persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(
        claim_name='shared-data-volume'))

volume_mount = k8s.V1VolumeMount(name='data-volume',
                                 mount_path='/shared-data',
                                 sub_path=None,
                                 read_only=False)

druidUrl = Variable.get("druid_broker_url")
templateUrl = Variable.get("ntreis_druid_validation_index_url")
ntreisPropDatasource = Variable.get("ntreis_prop_sold_datasource")
validationDatasource = Variable.get("validation_datasource")

login_url = Variable.get("ntreis_login_url")
rets_type = Variable.get("ntreis_rets_type")
search_limit = Variable.get("ntreis_search_limit")
password = Variable.get("ntreis_password")
def pipeline_definition(
    hydrosphere_address="{hydrosphere-instance-address}",  # <-- Replace with correct instance address
    mount_path='/storage',
    learning_rate="0.01",
    learning_steps="10000",
    batch_size="256",
    warmpup_count="100",
    model_name="mnist",
    application_name="mnist-app",
    signature_name="predict",
    acceptable_accuracy="0.90",
    requests_delay="4",
    recurring_run="0",
):
    
    storage_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="storage")
    storage_volume = k8s.V1Volume(name="storage", persistent_volume_claim=storage_pvc)
    storage_volume_mount = k8s.V1VolumeMount(
        mount_path="{{workflow.parameters.mount-path}}", name="storage")
    
    hydrosphere_address_env = k8s.V1EnvVar(
        name="CLUSTER_ADDRESS", value="{{workflow.parameters.hydrosphere-address}}")
    mount_path_env = k8s.V1EnvVar(
        name="MOUNT_PATH", value="{{workflow.parameters.mount-path}}")
    model_name_env = k8s.V1EnvVar(
        name="MODEL_NAME", value="{{workflow.parameters.model-name}}")
    application_name_env = k8s.V1EnvVar(
        name="APPLICATION_NAME", value="{{workflow.parameters.application-name}}")
    signature_name_env = k8s.V1EnvVar(
        name="SIGNATURE_NAME", value="{{workflow.parameters.signature-name}}")
    acceptable_accuracy_env = k8s.V1EnvVar(
        name="ACCEPTABLE_ACCURACY", value="{{workflow.parameters.acceptable-accuracy}}")
    learning_rate_env = k8s.V1EnvVar(
        name="LEARNING_RATE", value="{{workflow.parameters.learning-rate}}")
    learning_steps_env = k8s.V1EnvVar(
        name="LEARNING_STEPS", value="{{workflow.parameters.learning-steps}}")
    batch_size_env = k8s.V1EnvVar(
        name="BATCH_SIZE", value="{{workflow.parameters.batch-size}}")
    warmup_count_env = k8s.V1EnvVar(
        name="WARMUP_IMAGES_AMOUNT", value="{{workflow.parameters.warmpup-count}}")
    requests_delay_env = k8s.V1EnvVar(
        name="REQUESTS_DELAY", value="{{workflow.parameters.requests-delay}}")
    recurring_run_env = k8s.V1EnvVar(
        name="RECURRING_RUN", value="{{workflow.parameters.recurring-run}}")

    # # 1. Make a sample of production data for retraining
    sample = dsl.ContainerOp(
        name="sample",
        image="tidylobster/mnist-pipeline-sampling:latest")     # <-- Replace with correct docker image
    sample.add_volume(storage_volume)
    sample.add_volume_mount(storage_volume_mount)
    sample.add_env_variable(mount_path_env)
    sample.add_env_variable(hydrosphere_address_env)
    sample.add_env_variable(application_name_env)
    
    # 2. Train and save a MNIST classifier using Tensorflow
    train = dsl.ContainerOp(
        name="train",
        image="tidylobster/mnist-pipeline-train:latest",        # <-- Replace with correct docker image
        file_outputs={"accuracy": "/accuracy.txt"})

    train.after(sample)
    train.set_memory_request('2G')
    train.set_cpu_request('1')

    train.add_volume(storage_volume)
    train.add_volume_mount(storage_volume_mount)
    train.add_env_variable(mount_path_env)
    train.add_env_variable(learning_rate_env)
    train.add_env_variable(learning_steps_env)
    train.add_env_variable(batch_size_env)
    train.add_env_variable(recurring_run_env)

    # 3. Upload trained model to the cluster
    upload = dsl.ContainerOp(
        name="upload",
        image="tidylobster/mnist-pipeline-upload:latest",         # <-- Replace with correct docker image
        file_outputs={"model-version": "/model-version.txt"},
        arguments=[train.outputs["accuracy"]])
    upload.after(train)
    
    upload.add_volume(storage_volume) 
    upload.add_volume_mount(storage_volume_mount)
    upload.add_env_variable(mount_path_env)
    upload.add_env_variable(model_name_env)
    upload.add_env_variable(hydrosphere_address_env)
    upload.add_env_variable(learning_rate_env)
    upload.add_env_variable(learning_steps_env)
    upload.add_env_variable(batch_size_env)

    # 4. Pre-deploy application
    predeploy = dsl.ContainerOp(
        name="predeploy",
        image="tidylobster/mnist-pipeline-predeploy:latest",        # <-- Replace with correct docker image
        arguments=[upload.outputs["model-version"]],
        file_outputs={"predeploy-app-name": "/predeploy-app-name.txt"})
    predeploy.after(upload)

    predeploy.add_env_variable(hydrosphere_address_env)
    predeploy.add_env_variable(application_name_env)
    predeploy.add_env_variable(model_name_env)
    
    # 5. Test the model 
    test = dsl.ContainerOp(
        name="test",
        image="tidylobster/mnist-pipeline-test:latest",               # <-- Replace with correct docker image
        arguments=[predeploy.outputs["predeploy-app-name"]])
    test.set_retry(3)
    test.after(predeploy)

    test.add_volume(storage_volume) 
    test.add_volume_mount(storage_volume_mount)
    test.add_env_variable(mount_path_env)
    test.add_env_variable(hydrosphere_address_env)
    test.add_env_variable(application_name_env)
    test.add_env_variable(signature_name_env) 
    test.add_env_variable(warmup_count_env)
    test.add_env_variable(acceptable_accuracy_env)
    test.add_env_variable(requests_delay_env)
    test.add_env_variable(recurring_run_env)

    # 6. Remove predeploy application
    rm_predeploy = dsl.ContainerOp(
        name="remove-predeploy",
        image="tidylobster/mnist-pipeline-rm-predeploy:latest",    # <-- Replace with correct docker image  
        arguments=[predeploy.outputs["predeploy-app-name"]])
    rm_predeploy.after(test)
    rm_predeploy.add_env_variable(hydrosphere_address_env)

    # 7. Deploy application
    deploy = dsl.ContainerOp(
        name="deploy",
        image="tidylobster/mnist-pipeline-deploy:latest",              # <-- Replace with correct docker image
        arguments=[upload.outputs["model-version"]])
    deploy.after(test)

    deploy.add_env_variable(hydrosphere_address_env)
    deploy.add_env_variable(application_name_env)
    deploy.add_env_variable(model_name_env)
Esempio n. 9
0
 def to_k8s_volumes(self):
     return k8s.V1Volume(name=self.pv_name,persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(claim_name=self.pvc_name))
from kubernetes.client import models as k8s

from airflow import DAG
from airflow.providers.cncf.kubernetes.operators.kubernetes_pod import (
    KubernetesPodOperator, )

with DAG(
        dag_id="02_kubernetes",
        description="Fetches ratings from the Movielens API using kubernetes.",
        start_date=dt.datetime(2019, 1, 1),
        end_date=dt.datetime(2019, 1, 3),
        schedule_interval="@daily",
        default_args={"depends_on_past": True},
) as dag:

    volume_claim = k8s.V1PersistentVolumeClaimVolumeSource(
        claim_name="data-volume")
    volume = k8s.V1Volume(name="data-volume",
                          persistent_volume_claim=volume_claim)

    volume_mount = k8s.V1VolumeMount(name="data-volume",
                                     mount_path="/data",
                                     sub_path=None,
                                     read_only=False)

    fetch_ratings = KubernetesPodOperator(
        task_id="fetch_ratings",
        # Airflow 2.0.0a2 has a bug that results in the pod operator not applying
        # the image pull policy. By default, the k8s SDK uses a policy of always
        # pulling the image when using the latest tag, but only pulling an image if
        # it's not present (what we want) when using a different tag. For now, we
        # use this behaviour to get our desired image policy behaviour.
Esempio n. 11
0
# smaller than the number of the actual filters
num_of_pyramid_tasks_per_tile = 10

# Kubernetes config: namespace, resources, volume and volume_mounts
namespace = "default"

compute_resources = {
    "request_cpu": "2000m",
    "request_memory": "1.5Gi",
    "limit_cpu": "2000m",
    "limit_memory": "4.5Gi",
}

dataset_volume = k8s.V1Volume(
    name="eo-data",
    persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(
        claim_name="fonda-datasets"),
)

dataset_volume_mount = k8s.V1VolumeMount(name="eo-data",
                                         mount_path="/data/input",
                                         sub_path=None,
                                         read_only=True)

outputs_volume = k8s.V1Volume(
    name="outputs-data",
    persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(
        claim_name="force-airflow"),
)

outputs_volume_mount = k8s.V1VolumeMount(name="outputs-data",
                                         mount_path=OUTPUTS_DATA_PATH,
Esempio n. 12
0
#
# Use this for local testing
#
# volume = k8s.V1Volume(
#    name="lyric-wordcloud-volume",
#    host_path=k8s.V1HostPathVolumeSource(
#        path="/data",
#        type="Directory"
#    )
# )

# lyrics-wordcloud-volume is an existing PersistentVolumeClaim in the CephFS storage
volume = k8s.V1Volume(
    name="lyrics-wordcloud-volume",
    persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(
        claim_name='lyrics-wordcloud-volume'))

# describes where to mount the volume in the pod
volume_mount = k8s.V1VolumeMount(name="lyrics-wordcloud-volume",
                                 mount_path="/results",
                                 sub_path=None,
                                 read_only=False)

# TODO: Read from secret
env_vars = {'GENIUS_API_KEY': 'XXXXXXXXXXXXXXXXXXXXXXXXXX'}

#
# List of artists to create lyric wordlcouds for
artists = [
    "Rammstein", "Die Ärzte", "Die Toten Hosen", "Peter Maffay", "Nimo",
    "Mark Forster", "Lea"
Esempio n. 13
0
def pipeline_definition(
    hydrosphere_address,
    mount_path='/storage',
    learning_rate="0.01",
    epochs="10",
    batch_size="256",
    model_name="mnist",
    acceptable_accuracy="0.90",
):

    storage_pvc = k8s.V1PersistentVolumeClaimVolumeSource(claim_name="storage")
    storage_volume = k8s.V1Volume(name="storage",
                                  persistent_volume_claim=storage_pvc)
    storage_volume_mount = k8s.V1VolumeMount(
        mount_path="{{workflow.parameters.mount-path}}", name="storage")

    # 1. Download MNIST data
    download = dsl.ContainerOp(
        name="download",
        image=
        "tidylobster/mnist-pipeline-download:latest",  # <-- Replace with correct docker image
        file_outputs={"data_path": "/data_path.txt"},
        arguments=["--mount-path", mount_path])
    download.add_volume(storage_volume)
    download.add_volume_mount(storage_volume_mount)

    # 2. Train and save a MNIST classifier using Tensorflow
    train = dsl.ContainerOp(
        name="train",
        image=
        "tidylobster/mnist-pipeline-train:latest",  # <-- Replace with correct docker image
        file_outputs={
            "accuracy": "/accuracy.txt",
            "model_path": "/model_path.txt",
        },
        command=[
            "python", "train-estimator.py", "--data-path",
            download.outputs["data_path"], "--mount-path", mount_path,
            "--learning-rate", learning_rate, "--epochs", epochs,
            "--batch-size", batch_size
        ])
    train.add_volume(storage_volume)
    train.add_volume_mount(storage_volume_mount)

    train.after(download)
    train.set_memory_request('1G')
    train.set_cpu_request('1')

    # 3. Release trained model to the cluster
    release = dsl.ContainerOp(
        name="release",
        image=
        "tidylobster/mnist-pipeline-release:latest",  # <-- Replace with correct docker image
        file_outputs={"model-version": "/model-version.txt"},
        arguments=[
            "--data-path",
            download.outputs["data_path"],
            "--mount-path",
            mount_path,
            "--model-name",
            model_name,
            "--model-path",
            train.outputs["model_path"],
            "--accuracy",
            train.outputs["accuracy"],
            "--hydrosphere-address",
            hydrosphere_address,
            "--learning-rate",
            learning_rate,
            "--epochs",
            epochs,
            "--batch-size",
            batch_size,
        ])
    release.add_volume(storage_volume)
    release.add_volume_mount(storage_volume_mount)

    release.after(train)

    # 4. Deploy to stage application
    deploy_to_stage = dsl.ContainerOp(
        name="deploy_to_stage",
        image=
        "tidylobster/mnist-pipeline-deploy-to-stage:latest",  # <-- Replace with correct docker image
        file_outputs={"stage-app-name": "/stage-app-name.txt"},
        arguments=[
            "--model-version",
            release.outputs["model-version"],
            "--hydrosphere-address",
            hydrosphere_address,
            "--model-name",
            model_name,
        ],
    )
    deploy_to_stage.after(release)

    # 5. Test the model
    test = dsl.ContainerOp(
        name="test",
        image=
        "tidylobster/mnist-pipeline-test:latest",  # <-- Replace with correct docker image
        arguments=[
            "--stage-app-name",
            deploy_to_stage.outputs["stage-app-name"],
            "--data-path",
            download.outputs["data_path"],
            "--mount-path",
            mount_path,
            "--hydrosphere-address",
            hydrosphere_address,
            "--acceptable-accuracy",
            acceptable_accuracy,
            "--model-name",
            model_name,
        ],
    )
    test.add_volume(storage_volume)
    test.add_volume_mount(storage_volume_mount)

    test.after(deploy_to_stage)
    test.set_retry(3)

    # 6. Deploy to production application
    deploy_to_prod = dsl.ContainerOp(
        name="deploy_to_prod",
        image=
        "tidylobster/mnist-pipeline-deploy-to-prod:latest",  # <-- Replace with correct docker image
        arguments=[
            "--model-version", release.outputs["model-version"],
            "--model-name", model_name, "--hydrosphere-address",
            hydrosphere_address
        ],
    )
    deploy_to_prod.after(test)
    'retries': 0,
    'retry_delay': timedelta(minutes=5),
}

spark_application_name = "spark-wordcount-py-{{ ds }}-{{ task_instance.try_number }}"

compute_resources = {
    'request_cpu': '200m',
    'request_memory': '512Mi',
    'limit_cpu': '500m',
    'limit_memory': '1Gi'
}

volume = k8s.V1Volume(
    name="spark-data",
    persistent_volume_claim=k8s.V1PersistentVolumeClaimVolumeSource(
        claim_name='spark-pvc'))

# this describes where to mount the volume in the pod
volume_mount = k8s.V1VolumeMount(name="spark-data",
                                 mount_path="/mnt1",
                                 sub_path=None,
                                 read_only=False)

dag = DAG(
    'example_spark_wordcount_dag',
    default_args=default_args,
    description=
    'Simple wordcount spark job which uses the spark-k8s-operator in a Kubernetes cluster',
    schedule_interval=timedelta(days=1),
)