def test_use_azure_secret(self): op1 = ContainerOp(name='op1', image='image') op1 = op1.apply(use_azure_secret('foo')) assert len(op1.container.env) == 4 index = 0 for expected in ['AZ_SUBSCRIPTION_ID', 'AZ_TENANT_ID', 'AZ_CLIENT_ID', 'AZ_CLIENT_SECRET']: assert op1.container.env[index].name == expected assert op1.container.env[index].value_from.secret_key_ref.name == 'foo' assert op1.container.env[index].value_from.secret_key_ref.key == expected index += 1
def model_deploy(resource_group, workspace): operation = deploy_operation(deployment_name='deploymentname', model_name='model_name:1', tenant_id='$(AZ_TENANT_ID)', service_principal_id='$(AZ_CLIENT_ID)', service_principal_password='******', subscription_id='$(AZ_SUBSCRIPTION_ID)', resource_group=resource_group, workspace=workspace, inference_config='src/inferenceconfig.json', deployment_config='src/deploymentconfig.json'). \ apply(use_azure_secret()). \ apply(use_image(deploy_image_name))
def test_use_azure_secret(self): with Pipeline('somename') as p: op1 = ContainerOp(name='op1', image='image') op1 = op1.apply(use_azure_secret('azcreds')) assert len(op1.env_variables) == 4 index = 0 for expected in [ 'AZ_SUBSCRIPTION_ID', 'AZ_TENANT_ID', 'AZ_CLIENT_ID', 'AZ_CLIENT_SECRET' ]: print(op1.env_variables[index].name) print(op1.env_variables[index].value_from.secret_key_ref.name) print(op1.env_variables[index].value_from.secret_key_ref.key) index += 1
def transformer(containerOp): containerOp.arguments = ['/scripts/pipelineWrapper.py', 'Privacy', 'python'] + containerOp.arguments # shouldn't hard code this experiment name containerOp.container.set_image_pull_policy("Always") containerOp.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource( claim_name='azure-managed-disk') ) ).add_volume_mount(k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure')) containerOp.container.add_env_variable(V1EnvVar(name='AZ_NAME', value=ws.name))\ .add_env_variable(V1EnvVar(name='AZ_SUBSCRIPTION_ID', value=ws.subscription_id))\ .add_env_variable(V1EnvVar(name='AZ_RESOURCE_GROUP', value=ws.resource_group)) containerOp.apply(use_azure_secret('azcreds')) return containerOp
def run_spark_job(main_definition_file, command_line_arguments): operation = run_job_operation(executor_size='Small', executors=1, main_class_name='""', main_definition_file=main_definition_file, name='kubeflowsynapsetest', tenant_id='$(AZ_TENANT_ID)', service_principal_id='$(AZ_CLIENT_ID)', service_principal_password='******', subscription_id='$(AZ_SUBSCRIPTION_ID)', resource_group='kubeflow-demo-rg', command_line_arguments=command_line_arguments, spark_pool_name='kubeflowsynapse', language='', reference_files='', configuration='', tags='', spark_pool_config_file='./src/spark_pool_config.yaml', wait_until_job_finished=True, waiting_timeout_in_seconds=3600, workspace_name='kubeflow-demo'). \ apply(use_azure_secret()). \ apply(use_image(run_job_image_name))
def cnn_train(resource_group, workspace, dataset, token): """Pipeline steps""" persistent_volume_path = '/mnt/azure' data_download = dataset # noqa: E501 batch = 32 model_name = 'cnnmodel' operations = {} image_size = 160 training_folder = 'train' training_dataset = 'train.txt' model_folder = 'model' image_repo_name = "k8scc01covidmlopsacr.azurecr.io/mlops" callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080' mlflow_url = 'http://mlflow.mlflow:5000' exit_op = dsl.ContainerOp(name='Exit Handler', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload(TRAIN_FINISH_EVENT), callback_url ]) with dsl.ExitHandler(exit_op): start_callback = \ dsl.UserContainer('callback', 'curlimages/curl', command=['curl'], args=['-d', get_callback_payload(TRAIN_START_EVENT), callback_url]) # noqa: E501 operations['tensorflow preprocess'] = dsl.ContainerOp( name='tensorflow preprocess', init_containers=[start_callback], image=image_repo_name + '/tensorflow-preprocess:latest', command=['python'], arguments=[ '/scripts/data.py', '--base_path', persistent_volume_path, '--data', training_folder, '--target', training_dataset, '--img_size', image_size, '--zipfile', data_download ]) operations['tensorflow training'] = dsl.ContainerOp( name="tensorflow training", image=image_repo_name + '/tensorflow-training:latest', command=['python'], arguments=[ '/scripts/train.py', '--base_path', persistent_volume_path, '--data', training_folder, '--epochs', 2, '--batch', batch, '--image_size', image_size, '--lr', 0.0001, '--outputs', model_folder, '--dataset', training_dataset ], output_artifact_paths={ 'mlpipeline-metrics': '/mlpipeline-metrics.json', 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }).apply(use_azstorage_secret()).add_env_variable( V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)).add_env_variable( V1EnvVar( name="MLFLOW_TRACKING_TOKEN", value=token)).add_env_variable( V1EnvVar( name="MLFLOW_TRACKING_URI", value=mlflow_url)).add_env_variable( V1EnvVar( name="GIT_PYTHON_REFRESH", value='quiet')) # noqa: E501 operations['tensorflow training'].after( operations['tensorflow preprocess']) # noqa: E501 operations['evaluate'] = dsl.ContainerOp( name='evaluate', image="busybox", command=['sh', '-c'], arguments=['echo', 'Life is Good!']) operations['evaluate'].after(operations['tensorflow training']) operations['register kubeflow'] = dsl.ContainerOp( name='register kubeflow', image=image_repo_name + '/register-kubeflow-artifacts:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--data', training_folder, '--dataset', training_dataset, '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()) operations['register kubeflow'].after(operations['evaluate']) operations['register AML'] = dsl.ContainerOp( name='register AML', image=image_repo_name + '/register-aml:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id', "$(AZ_CLIENT_ID)", '--service_principal_password', "$(AZ_CLIENT_SECRET)", '--subscription_id', "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group, '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()) operations['register AML'].after(operations['register kubeflow']) operations['register mlflow'] = dsl.ContainerOp( name='register mlflow', image=image_repo_name + '/register-mlflow:latest', command=['python'], arguments=[ '/scripts/register.py', '--model', 'model', '--model_name', model_name, '--experiment_name', 'kubeflow-mlops', '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()).add_env_variable( V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)).add_env_variable( V1EnvVar(name="MLFLOW_TRACKING_TOKEN", value=token)) # noqa: E501 operations['register mlflow'].after(operations['register AML']) operations['finalize'] = dsl.ContainerOp( name='Finalize', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload("Model is registered"), callback_url ]) operations['finalize'].after(operations['register mlflow']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure'))
def tacosandburritos_train( resource_group, workspace ): """Pipeline steps""" persistent_volume_path = '/mnt/azure' data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip' # noqa: E501 epochs = 2 batch = 32 learning_rate = 0.0001 model_name = 'tacosandburritos' operations = {} image_size = 160 training_folder = 'train' training_dataset = 'train.txt' model_folder = 'model' image_repo_name = "kubeflowyoacr.azurecr.io/mexicanfood" callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080' exit_op = dsl.ContainerOp( name='Exit Handler', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload(TRAIN_FINISH_EVENT), callback_url ] ) with dsl.ExitHandler(exit_op): start_callback = \ dsl.UserContainer('callback', 'curlimages/curl', command=['curl'], args=['-d', get_callback_payload(TRAIN_START_EVENT), callback_url]) # noqa: E501 operations['preprocess'] = dsl.ContainerOp( name='preprocess', init_containers=[start_callback], image=image_repo_name + '/preprocess:latest', command=['python'], arguments=[ '/scripts/data.py', '--base_path', persistent_volume_path, '--data', training_folder, '--target', training_dataset, '--img_size', image_size, '--zipfile', data_download ] ) # train operations['training'] = dsl.ContainerOp( name='training', image=image_repo_name + '/training:latest', command=['python'], arguments=[ '/scripts/train.py', '--base_path', persistent_volume_path, '--data', training_folder, '--epochs', epochs, '--batch', batch, '--image_size', image_size, '--lr', learning_rate, '--outputs', model_folder, '--dataset', training_dataset ] ) operations['training'].after(operations['preprocess']) # register model operations['register'] = dsl.ContainerOp( name='register', image=image_repo_name + '/register:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id', "$(AZ_CLIENT_ID)", '--service_principal_password', "$(AZ_CLIENT_SECRET)", '--subscription_id', "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group, '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER ] ).apply(use_azure_secret()) operations['register'].after(operations['training']) operations['finalize'] = dsl.ContainerOp( name='Finalize', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload("Model is registered"), callback_url ] ) operations['finalize'].after(operations['register']) # operations['deploy'] = dsl.ContainerOp( # name='deploy', # image=image_repo_name + '/deploy:latest', # command=['sh'], # arguments=[ # '/scripts/deploy.sh', # '-n', model_name, # '-m', model_name, # '-t', "$(AZ_TENANT_ID)", # '-r', resource_group, # '-w', workspace, # '-s', "$(AZ_CLIENT_ID)", # '-p', "$(AZ_CLIENT_SECRET)", # '-u', "$(AZ_SUBSCRIPTION_ID)", # '-b', persistent_volume_path, # '-x', dsl.RUN_ID_PLACEHOLDER # ] # ).apply(use_azure_secret()) # operations['deploy'].after(operations['register']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-disk') ) ).add_volume_mount(k8s_client.V1VolumeMount( mount_path='/mnt/azure', name='azure'))
def tacosandburritos_train( resource_group, workspace ): """Pipeline steps""" persistent_volume_path = '/mnt/azure' data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip' # noqa: E501 epochs = 2 batch = 32 learning_rate = 0.0001 model_name = 'tacosandburritos' operations = {} image_size = 160 training_folder = 'train' training_dataset = 'train.txt' model_folder = 'model' image_repo_name = "kubeflowyoacr.azurecr.io/mexicanfood" # preprocess data operations['preprocess'] = dsl.ContainerOp( name='preprocess', image=image_repo_name + '/preprocess:latest', command=['python'], arguments=[ '/scripts/data.py', '--base_path', persistent_volume_path, '--data', training_folder, '--target', training_dataset, '--img_size', image_size, '--zipfile', data_download ] ) # train operations['training'] = dsl.ContainerOp( name='training', image=image_repo_name + '/training:latest', command=['python'], arguments=[ '/scripts/train.py', '--base_path', persistent_volume_path, '--data', training_folder, '--epochs', epochs, '--batch', batch, '--image_size', image_size, '--lr', learning_rate, '--outputs', model_folder, '--dataset', training_dataset ] ) operations['training'].after(operations['preprocess']) # register model operations['register'] = dsl.ContainerOp( name='register', image=image_repo_name + '/register:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id', "$(AZ_CLIENT_ID)", '--service_principal_password', "$(AZ_CLIENT_SECRET)", '--subscription_id', "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group, '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER ] ).apply(use_azure_secret()) operations['register'].after(operations['training']) operations['deploy'] = dsl.ContainerOp( name='deploy', image=image_repo_name + '/deploy:latest', command=['sh'], arguments=[ '/scripts/deploy.sh', '-n', model_name, '-m', model_name, '-i', '/scripts/inferenceconfig.json', '-d', '/scripts/deploymentconfig.json', '-t', "$(AZ_TENANT_ID)", '-r', resource_group, '-w', workspace, '-s', "$(AZ_CLIENT_ID)", '-p', "$(AZ_CLIENT_SECRET)", '-u', "$(AZ_SUBSCRIPTION_ID)", '-b', persistent_volume_path, '-x', dsl.RUN_ID_PLACEHOLDER ] ).apply(use_azure_secret()) operations['deploy'].after(operations['register']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-disk') ) ).add_volume_mount(k8s_client.V1VolumeMount( mount_path='/mnt/azure', name='azure'))
def tacosandburritos_train(resource_group, workspace, dataset): exit_handler = exit_op( callback_url=callback_url, callback_payload=get_callback_payload(TRAIN_FINISH_EVENT)) with dsl.ExitHandler(exit_handler): operations['data processing on databricks'] = databricks_op(run_id=dsl.RUN_ID_PLACEHOLDER, # noqa: E501 notebook_params='{"argument_one":"param one","argument_two":"param two"}' # noqa: E501 ).apply(use_databricks_secret()). \ add_init_container(get_start_callback_container()). \ apply( use_image(databricks_image_name)) operations['preprocess'] = preprocess_op(base_path=persistent_volume_path, # noqa: E501 training_folder=training_folder, # noqa: E501 target=training_dataset, image_size=image_size, zipfile=dataset). \ apply( use_image(preprocess_image_name)) operations['preprocess'].after( operations['data processing on databricks']) # noqa: E501 operations['training'] = train_op(base_path=persistent_volume_path, training_folder=training_folder, epochs=2, batch=batch, image_size=image_size, lr=0.0001, model_folder=model_folder, images=training_dataset, dataset=operations['preprocess'].outputs['dataset']). \ set_memory_request('16G'). \ add_env_variable(V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)). \ add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \ add_env_variable(V1EnvVar(name="GIT_PYTHON_REFRESH", value='quiet')).apply(use_image(train_image_name)) # noqa: E501, E127 operations['training'].after(operations['preprocess']) operations['evaluate'] = evaluate_op( model=operations['training'].outputs['model']) operations['evaluate'].after(operations['training']) operations['register to AML'] = register_op( base_path=persistent_volume_path, model_file='latest.h5', model_name=model_name, tenant_id='$(AZ_TENANT_ID)', service_principal_id='$(AZ_CLIENT_ID)', service_principal_password='******', subscription_id='$(AZ_SUBSCRIPTION_ID)', resource_group=resource_group, workspace=workspace, run_id=dsl.RUN_ID_PLACEHOLDER).apply(use_azure_secret()).apply( use_image(register_images_name)) # noqa: E501, E127 operations['register to AML'].after(operations['evaluate']) operations['register to mlflow'] = register_mlflow_op(model='model', model_name=model_name, experiment_name='mexicanfood', run_id=dsl.RUN_ID_PLACEHOLDER).apply(use_azure_secret()). \ add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)).apply(use_image(register_mlflow_image_name)) # noqa: E501 operations['register to mlflow'].after(operations['register to AML']) operations['finalize'] = finalize_op( callback_url=callback_url, callback_payload=get_callback_payload("Model is registered")) operations['finalize'].after(operations['register to mlflow']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure'))
def tacosandburritos_train(resource_group, workspace, dataset): """Pipeline steps""" persistent_volume_path = '/mnt/azure' data_download = dataset # noqa: E501 batch = 32 model_name = 'tacosandburritos' operations = {} image_size = 160 training_folder = 'train' training_dataset = 'train.txt' model_folder = 'model' image_repo_name = "kubeflowyoacr.azurecr.io/mexicanfood" callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080' mlflow_url = 'http://mlflow:5000' exit_op = dsl.ContainerOp(name='Exit Handler', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload(TRAIN_FINISH_EVENT), callback_url ]) with dsl.ExitHandler(exit_op): start_callback = \ dsl.UserContainer('callback', 'curlimages/curl', command=['curl'], args=['-d', get_callback_payload(TRAIN_START_EVENT), callback_url]) # noqa: E501 operations['data processing on databricks'] = dsl.ContainerOp( name='data processing on databricks', init_containers=[start_callback], image=image_repo_name + '/databricks-notebook:latest', arguments=[ '-r', dsl.RUN_ID_PLACEHOLDER, '-p', '{"argument_one":"param one","argument_two":"param two"}' ]).apply(use_databricks_secret()) operations['preprocess'] = dsl.ContainerOp( name='preprocess', image=image_repo_name + '/preprocess:latest', command=['python'], arguments=[ '/scripts/data.py', '--base_path', persistent_volume_path, '--data', training_folder, '--target', training_dataset, '--img_size', image_size, '--zipfile', data_download ]) operations['preprocess'].after( operations['data processing on databricks']) # noqa: E501 # train # TODO: read set of parameters from config file with dsl.ParallelFor([{ 'epochs': 1, 'lr': 0.0001 }, { 'epochs': 2, 'lr': 0.0002 }, { 'epochs': 3, 'lr': 0.0003 }]) as item: # noqa: E501 operations['training'] = dsl.ContainerOp( name="training", image=image_repo_name + '/training:latest', command=['python'], arguments=[ '/scripts/train.py', '--base_path', persistent_volume_path, '--data', training_folder, '--epochs', item.epochs, '--batch', batch, '--image_size', image_size, '--lr', item.lr, '--outputs', model_folder, '--dataset', training_dataset ], output_artifact_paths= { # change output_artifact_paths to file_outputs after this PR is merged https://github.com/kubeflow/pipelines/pull/2334 # noqa: E501 'mlpipeline-metrics': '/mlpipeline-metrics.json', 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }).add_env_variable( V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)).add_env_variable( V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)).add_env_variable( V1EnvVar( name="GIT_PYTHON_REFRESH", value='quiet')) # noqa: E501 operations['training'].after(operations['preprocess']) operations['evaluate'] = dsl.ContainerOp( name='evaluate', image="busybox", command=['sh', '-c'], arguments=['echo', 'Life is Good!']) operations['evaluate'].after(operations['training']) # register kubeflow artifcats model operations['register to kubeflow'] = dsl.ContainerOp( name='register to kubeflow', image=image_repo_name + '/registerartifacts:latest', command=['python'], arguments=[ '/scripts/registerartifacts.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--data', training_folder, '--dataset', training_dataset, '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()) operations['register to kubeflow'].after(operations['evaluate']) # register model operations['register to AML'] = dsl.ContainerOp( name='register to AML', image=image_repo_name + '/register:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id', "$(AZ_CLIENT_ID)", '--service_principal_password', "$(AZ_CLIENT_SECRET)", '--subscription_id', "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group, '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()) operations['register to AML'].after(operations['register to kubeflow']) # register model to mlflow operations['register to mlflow'] = dsl.ContainerOp( name='register to mlflow', image=image_repo_name + '/register-mlflow:latest', command=['python'], arguments=[ '/scripts/register.py', '--model', 'model', '--model_name', model_name, '--experiment_name', 'mexicanfood', '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()).add_env_variable( V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)) # noqa: E501 operations['register to mlflow'].after(operations['register to AML']) operations['finalize'] = dsl.ContainerOp( name='Finalize', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload("Model is registered"), callback_url ]) operations['finalize'].after(operations['register to mlflow']) # operations['deploy'] = dsl.ContainerOp( # name='deploy', # image=image_repo_name + '/deploy:latest', # command=['sh'], # arguments=[ # '/scripts/deploy.sh', # '-n', model_name, # '-m', model_name, # '-t', "$(AZ_TENANT_ID)", # '-r', resource_group, # '-w', workspace, # '-s', "$(AZ_CLIENT_ID)", # '-p', "$(AZ_CLIENT_SECRET)", # '-u', "$(AZ_SUBSCRIPTION_ID)", # '-b', persistent_volume_path, # '-x', dsl.RUN_ID_PLACEHOLDER # ] # ).apply(use_azure_secret()) # operations['deploy'].after(operations['register']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure'))
def tacosandburritos_train(resource_group, workspace, dataset, mlflow_experiment_id, azdocallbackinfo=None): exit_handler_op = exit_op( kfp_host_url="$(KFP_HOST)", azdocallbackinfo=azdocallbackinfo, run_id=dsl.RUN_ID_PLACEHOLDER, tenant_id="$(AZ_TENANT_ID)", service_principal_id="$(AZ_CLIENT_ID)", service_principal_password="******", pat_env="PAT_ENV").apply(use_azure_secret()).apply( use_kfp_host_secret()).apply(use_image(exit_image_name)).apply( use_secret_var("azdopat", "PAT_ENV", "azdopat")) with dsl.ExitHandler(exit_op=exit_handler_op): operations['mlflowproject'] = mlflow_project_op( mlflow_experiment_id=mlflow_experiment_id, # noqa: E501 kf_run_id=dsl.RUN_ID_PLACEHOLDER).apply( use_databricks_secret()).apply( use_image(mlflow_project_image_name)) # noqa: E501 operations['preprocess'] = preprocess_op( base_path=persistent_volume_path, # noqa: E501 training_folder=training_folder, # noqa: E501 target=training_dataset, image_size=image_size, zipfile=dataset).apply( use_image(preprocess_image_name)) # noqa: E501 operations['preprocess'].after( operations['mlflowproject']) # noqa: E501 operations['training'] = train_op(base_path=persistent_volume_path, training_folder=training_folder, epochs=2, batch=batch, image_size=image_size, lr=0.0001, model_folder=model_folder, images=training_dataset, dataset=operations['preprocess'].outputs['dataset']). \ set_memory_request('16G'). \ add_env_variable(V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)). \ add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \ add_env_variable(V1EnvVar(name="GIT_PYTHON_REFRESH", value='quiet')). \ apply(use_image(train_image_name)) # Spot nodepool target # operations['training'].add_toleration(k8s_client.V1Toleration( # key='kubernetes.azure.com/scalesetpriority', # operator='Equal', # value='spot', # effect="NoSchedule")) # Virtual/ACI nodepool target # operations['training'].add_node_selector_constraint( # label_name='type', value='virtual-kubelet') # operations['training'].add_toleration(k8s_client.V1Toleration( # key='virtual-kubelet.io/provider', operator='Exists')) operations['training'].after(operations['preprocess']) operations['evaluate'] = evaluate_op( model=operations['training'].outputs['model']) operations['evaluate'].after(operations['training']) operations['register to AML'] = register_op(base_path=persistent_volume_path, model_file='latest.h5', model_name=model_name, tenant_id='$(AZ_TENANT_ID)', service_principal_id='$(AZ_CLIENT_ID)', service_principal_password='******', subscription_id='$(AZ_SUBSCRIPTION_ID)', resource_group=resource_group, workspace=workspace, run_id=dsl.RUN_ID_PLACEHOLDER). \ apply(use_azure_secret()). \ apply(use_image(register_images_name)) operations['register to AML'].after(operations['evaluate']) operations['register to mlflow'] = register_mlflow_op(model='model', model_name=model_name, experiment_name='mexicanfood', run_id=dsl.RUN_ID_PLACEHOLDER). \ apply(use_azure_secret()). \ add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \ apply(use_image(register_mlflow_image_name)) operations['register to mlflow'].after(operations['register to AML']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure'))