def default_train( resource_group, workspace, dataset ): """Pipeline steps""" operations = {} callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080' exit_op = dsl.ContainerOp( name='Exit Handler', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload(TRAIN_FINISH_EVENT), callback_url ] ) with dsl.ExitHandler(exit_op): start_callback = \ dsl.UserContainer('callback', 'curlimages/curl', command=['curl'], args=['-d', get_callback_payload(TRAIN_START_EVENT), callback_url]) # noqa: E501 operations['start'] = dsl.ContainerOp( name='start', init_containers=[start_callback], image="busybox", command=['sh', '-c'], arguments=[ 'echo', 'Pipeline starting' ] ) operations['end'] = dsl.ContainerOp( name='End', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload("Model is registered"), callback_url ] ) operations['end'].after(operations['start']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file') ) ).add_volume_mount(k8s_client.V1VolumeMount( mount_path='/mnt/azure', name='azure'))
def xgb_train_pipeline( output, project, region='us-central1', train_data='gs://ml-pipeline-playground/sfpd/train.csv', eval_data='gs://ml-pipeline-playground/sfpd/eval.csv', schema='gs://ml-pipeline-playground/sfpd/schema.json', target='resolution', rounds=200, workers=2, true_label='ACTION', ): delete_cluster_op = DeleteClusterOp('delete-cluster', project, region).apply(gcp.use_gcp_secret('user-gcp-sa')) with dsl.ExitHandler(exit_op=delete_cluster_op): create_cluster_op = CreateClusterOp('create-cluster', project, region, output).apply(gcp.use_gcp_secret('user-gcp-sa')) analyze_op = AnalyzeOp('analyze', project, region, create_cluster_op.output, schema, train_data, '%s/{{workflow.name}}/analysis' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) transform_op = TransformOp('transform', project, region, create_cluster_op.output, train_data, eval_data, target, analyze_op.output, '%s/{{workflow.name}}/transform' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) train_op = TrainerOp('train', project, region, create_cluster_op.output, transform_op.outputs['train'], transform_op.outputs['eval'], target, analyze_op.output, workers, rounds, '%s/{{workflow.name}}/model' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) predict_op = PredictOp('predict', project, region, create_cluster_op.output, transform_op.outputs['eval'], train_op.output, target, analyze_op.output, '%s/{{workflow.name}}/predict' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) confusion_matrix_op = ConfusionMatrixOp('confusion-matrix', predict_op.output, '%s/{{workflow.name}}/confusionmatrix' % output).apply(gcp.use_gcp_secret('user-gcp-sa')) roc_op = RocOp('roc', predict_op.output, true_label, '%s/{{workflow.name}}/roc' % output).apply(gcp.use_gcp_secret('user-gcp-sa'))
def pipeline_exit_handler(message: str = 'Hello World!'): exit_task = print_op(message='Exit handler has worked!') with dsl.ExitHandler(exit_task): print_op(message=message) fail_op(message='Task failed.')
def mnist_pipeline(learning_rate, dropout_rate, checkpoint_dir, saved_model_dir, tensorboard_log): exit_task = echo_op("Done!") with dsl.ExitHandler(exit_task): vop = dsl.VolumeOp(name="mnist_model_volume", resource_name="mnist_model", storage_class="nfs-client", modes=dsl.VOLUME_MODE_RWM, size="10Gi") mnist = dsl.ContainerOp( name='Mnist', image= 'kubeflow-registry.default.svc.cluster.local:30000/katib-job:2B27615F', command=['python', '/app/mnist_to_pipeline.py'], arguments=[ "--learning_rate", learning_rate, "--dropout_rate", dropout_rate, "--checkpoint_dir", checkpoint_dir, "--saved_model_dir", saved_model_dir, "--tensorboard_log", tensorboard_log ], pvolumes={"/result": vop.volume}) result = dsl.ContainerOp(name='list_list', image='library/bash:4.4.23', command=['ls', '-R', '/result'], pvolumes={"/result": mnist.pvolume}) mnist.after(vop) result.after(mnist)
def _create_pipeline_exit_handler(self): enable_volume_cleaning = ( self.run_config.volume is not None and not self.run_config.volume.keep ) if not enable_volume_cleaning: return contextlib.nullcontext() return dsl.ExitHandler( dsl.ContainerOp( name="schedule-volume-termination", image="gcr.io/cloud-builders/kubectl", command=[ "kubectl", "delete", "pvc", "{{workflow.name}}-data-volume", "--wait=false", "--ignore-not-found", "--output", "name", ], ) )
def xgb_train_pipeline( output, project, region=dsl.PipelineParam('region', value='us-central1'), train_data=dsl.PipelineParam('train-data', value='gs://ml-pipeline-playground/sfpd/train.csv'), eval_data=dsl.PipelineParam('eval-data', value='gs://ml-pipeline-playground/sfpd/eval.csv'), schema=dsl.PipelineParam('schema', value='gs://ml-pipeline-playground/sfpd/schema.json'), target=dsl.PipelineParam('target', value='resolution'), rounds=dsl.PipelineParam('rounds', value=200), workers=dsl.PipelineParam('workers', value=2), true_label=dsl.PipelineParam('true-label', value='ACTION'), ): delete_cluster_op = DeleteClusterOp('delete-cluster', project, region) with dsl.ExitHandler(exit_op=delete_cluster_op): create_cluster_op = CreateClusterOp('create-cluster', project, region, output) analyze_op = AnalyzeOp('analyze', project, region, create_cluster_op.output, schema, train_data, '%s/{{workflow.name}}/analysis' % output) transform_op = TransformOp('transform', project, region, create_cluster_op.output, train_data, eval_data, target, analyze_op.output, '%s/{{workflow.name}}/transform' % output) train_op = TrainerOp('train', project, region, create_cluster_op.output, transform_op.outputs['train'], transform_op.outputs['eval'], target, analyze_op.output, workers, rounds, '%s/{{workflow.name}}/model' % output) predict_op = PredictOp('predict', project, region, create_cluster_op.output, transform_op.outputs['eval'], train_op.output, target, analyze_op.output, '%s/{{workflow.name}}/predict' % output) confusion_matrix_op = ConfusionMatrixOp('confusion-matrix', predict_op.output, '%s/{{workflow.name}}/confusionmatrix' % output) roc_op = RocOp('roc', predict_op.output, true_label, '%s/{{workflow.name}}/roc' % output)
def pipeline_exit_handler(url='gs://ml-pipeline/shakespeare1.txt'): """A sample pipeline showing exit handler.""" exit_task = echo_msg('exit!') with dsl.ExitHandler(exit_task): download_task = gcs_download_op(url) echo_task = print_file(download_task.output)
def download_and_print(url='gs://ml-pipeline-playground/shakespeare1.txt'): """A sample pipeline showing exit handler.""" exit_task = echo_op('exit!') with dsl.ExitHandler(exit_task): download_task = gcs_download_op(url) echo_task = echo_op(download_task.output)
def kfpipeline(): exit_task = NewTask(handler='run_summary_comment') exit_task.with_params(workflow_id='{{workflow.uid}}', repo=this_project.params.get('git_repo'), issue=this_project.params.get('git_issue')) exit_task.with_secrets( 'inline', {'GITHUB_TOKEN': this_project.get_secret('GITHUB_TOKEN')}) with dsl.ExitHandler(funcs['git_utils'].as_step(exit_task, name='exit-handler')): # run the ingestion function with the new image and params ingest = funcs['gen-iris'].as_step(name="get-data", handler='iris_generator', params={'format': 'pq'}, outputs=[DATASET]) # train with hyper-paremeters train = funcs["train"].as_step( name="train", params={ "sample": -1, "label_column": LABELS, "test_size": 0.10 }, hyperparams={ 'model_pkg_class': [ "sklearn.ensemble.RandomForestClassifier", "sklearn.linear_model.LogisticRegression", "sklearn.ensemble.AdaBoostClassifier" ] }, selector='max.accuracy', inputs={"dataset": ingest.outputs[DATASET]}, labels={"commit": this_project.params.get('commit', '')}, outputs=['model', 'test_set']) # test and visualize our model test = funcs["test"].as_step(name="test", params={"label_column": LABELS}, inputs={ "models_path": train.outputs['model'], "test_set": train.outputs['test_set'] }) # deploy our model as a serverless function deploy = funcs["serving"].deploy_step( models={f"{DATASET}_v1": train.outputs['model']}, tag=this_project.params.get('commit', 'v1')[:6]) # test out new model server (via REST API calls) tester = funcs["live_tester"].as_step( name='model-tester', params={ 'addr': deploy.outputs['endpoint'], 'model': f"{DATASET}_v1" }, inputs={'table': train.outputs['test_set']})
def mnist_pipeline(learning_rate, dropout_rate, checkpoint_dir, saved_model_dir, tensorboard_log): exit_task = echo_op("Done!") with dsl.ExitHandler(exit_task): vop = dsl.VolumeOp(name="mnist_model_volume", resource_name="mnist_model", storage_class="nfs-client", modes=dsl.VOLUME_MODE_RWM, size="10Gi") mnist = dsl.ContainerOp( name='Mnist', image= 'kubeflow-registry.default.svc.cluster.local:30000/katib-job:2B27615F', command=['python', '/app/mnist_to_pipeline.py'], arguments=[ "--learning_rate", learning_rate, "--dropout_rate", dropout_rate, "--checkpoint_dir", checkpoint_dir, "--saved_model_dir", saved_model_dir, "--tensorboard_log", tensorboard_log ], pvolumes={"/result": vop.volume}, output_artifact_paths={ 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }, container_kwargs={ 'env': [ V1EnvVar('S3_ENDPOINT', 'minio-service.kubeflow.svc.cluster.local:9000'), V1EnvVar( 'AWS_ENDPOINT_URL', 'http://minio-service.kubeflow.svc.cluster.local:9000' ), V1EnvVar('AWS_ACCESS_KEY_ID', 'minio'), V1EnvVar('AWS_SECRET_ACCESS_KEY', 'minio123'), V1EnvVar('AWS_REGION', 'us-east-1'), V1EnvVar('S3_USE_HTTPS', '0'), V1EnvVar('S3_VERIFY_SSL', '0'), ] }) result = dsl.ContainerOp(name='list_list', image='library/bash:4.4.23', command=['ls', '-R', '/result'], pvolumes={"/result": mnist.pvolume}) mnist.after(vop) result.after(mnist) arguments = { 'learning_rate': '0.01', 'dropout_rate': '0.2', 'checkpoint_dir': '/reuslt/training_checkpoints', 'model_version': '001', 'saved_model_dir': '/result/saved_model', 'tensorboard_log': '/result/log' }
def xgb_train_pipeline( output, project, region='us-central1', train_data='gs://ml-pipeline-playground/sfpd/train.csv', eval_data='gs://ml-pipeline-playground/sfpd/eval.csv', schema='gs://ml-pipeline-playground/sfpd/schema.json', target='resolution', rounds=200, workers=2, true_label='ACTION', ): output_template = str( output) + '/' + dsl.EXECUTION_ID_PLACEHOLDER + '/data' delete_cluster_op = dataproc_delete_cluster_op(project, region).apply( gcp.use_gcp_secret('user-gcp-sa')) with dsl.ExitHandler(exit_op=delete_cluster_op): create_cluster_op = dataproc_create_cluster_op( project, region, output).apply(gcp.use_gcp_secret('user-gcp-sa')) analyze_op = dataproc_analyze_op(project, region, create_cluster_op.output, schema, train_data, output_template).apply( gcp.use_gcp_secret('user-gcp-sa')) transform_op = dataproc_transform_op( project, region, create_cluster_op.output, train_data, eval_data, target, analyze_op.output, output_template).apply(gcp.use_gcp_secret('user-gcp-sa')) train_op = dataproc_train_op(project, region, create_cluster_op.output, transform_op.outputs['train'], transform_op.outputs['eval'], target, analyze_op.output, workers, rounds, output_template).apply( gcp.use_gcp_secret('user-gcp-sa')) predict_op = dataproc_predict_op( project, region, create_cluster_op.output, transform_op.outputs['eval'], train_op.output, target, analyze_op.output, output_template).apply(gcp.use_gcp_secret('user-gcp-sa')) confusion_matrix_task = confusion_matrix_op( predict_op.output, output_template).apply(gcp.use_gcp_secret('user-gcp-sa')) roc_task = roc_op(predictions_dir=predict_op.output, true_class=true_label, true_score_column=true_label, output_dir=output_template).apply( gcp.use_gcp_secret('user-gcp-sa'))
def save_most_frequent_word(): exit_task = exit_op() with dsl.ExitHandler(exit_task): counter = frequent_word_op(message=message_param) counter.container.set_memory_request('200M') saver = save_message_op(message=counter.outputs['word'], output_path=output_path_param) saver.container.set_cpu_limit('0.5') # saver.container.set_gpu_limit('2') saver.add_node_selector_constraint('kubernetes.io/os', 'linux')
def save_most_frequent_word(): exit_op = ExitHandlerOp('exiting') with dsl.ExitHandler(exit_op): counter = GetFrequentWordOp(name='get-Frequent', message=message_param) counter.container.set_memory_request('200M') saver = SaveMessageOp(name='save', message=counter.output, output_path=output_path_param) saver.container.set_cpu_limit('0.5') # saver.container.set_gpu_limit('2') saver.add_node_selector_constraint('kubernetes.io/os', 'linux')
def save_most_frequent_word(message: dsl.PipelineParam, outputpath: dsl.PipelineParam): """A pipeline function describing the orchestration of the workflow.""" exit_op = ExitHandlerOp('exiting') with dsl.ExitHandler(exit_op): counter = GetFrequentWordOp(name='get-Frequent', message=message) counter.set_memory_request('200M') saver = SaveMessageOp(name='save', message=counter.output, output_path=outputpath) saver.set_cpu_limit('0.5')
def save_most_frequent_word(): exit_op = ExitHandlerOp('exiting') with dsl.ExitHandler(exit_op): counter = GetFrequentWordOp(name='get-Frequent', message=message_param) counter.container.set_memory_request('200M') saver = SaveMessageOp(name='save', message=counter.output, output_path=output_path_param) saver.container.set_cpu_limit('0.5') saver.container.set_gpu_limit('2') saver.add_node_selector_constraint('cloud.google.com/gke-accelerator', 'nvidia-tesla-k80') saver.apply( gcp.use_tpu(tpu_cores=8, tpu_resource='v2', tf_version='1.12'))
def save_most_frequent_word(message: str, outputpath: str): """A pipeline function describing the orchestration of the workflow.""" exit_op = ExitHandlerOp('exiting') with dsl.ExitHandler(exit_op): counter = GetFrequentWordOp(name='get-Frequent', message=message) counter.set_memory_request('200M') saver = SaveMessageOp(name='save', message=counter.output, output_path=outputpath) saver.set_cpu_limit('0.5') saver.set_gpu_limit('2') saver.add_node_selector_constraint('cloud.google.com/gke-accelerator', 'nvidia-tesla-k80')
def mnist_pipeline(volume_size, learning_rate, dropout_rate, checkpoint_dir, saved_model_dir, tensorboard_log, namespace, storage_uri, name): exit_task = echo_op("Done!") with dsl.ExitHandler(exit_task): vop = dsl.VolumeOp(name='mnist_model', resource_name='mnist_model', storage_class="nfs-client", modes=dsl.VOLUME_MODE_RWM, size=volume_size) mnist = dsl.ContainerOp( name='Mnist', image= 'kubeflow-registry.default.svc.cluster.local:30000/katib-job:FF61F3B', command=['python', '/app/Untitled.py'], arguments=[ "--learning_rate", learning_rate, "--dropout_rate", dropout_rate, "--checkpoint_dir", checkpoint_dir, "--saved_model_dir", saved_model_dir, "--tensorboard_log", tensorboard_log ], pvolumes={"/result": vop.volume}) result = dsl.ContainerOp(name='list_list', image='library/bash:4.4.23', command=['ls', '-R', '/result'], pvolumes={"/result": mnist.pvolume}) kfserving = dsl.ContainerOp( name='kfserving', image= 'kubeflow-registry.default.svc.cluster.local:30000/kfserving:6D7B836C', command=['python', '/app/kfserving-fairing.py'], arguments=[ "--namespace", namespace, "--storage_uri", "pvc://" + str(vop.volume.persistent_volume_claim.claim_name) + str(storage_uri), "--name", name ], pvolumes={"/result": mnist.pvolume}) mnist_web_ui = dsl.ContainerOp( name='mnist_web_ui', image='brightfly/kfserving-mnist-web-ui-deploy:latest', ) mnist.after(vop) result.after(mnist) kfserving.after(mnist) mnist_web_ui.after(kfserving)
def download_and_print(url='gs://ml-pipeline-playground/shakespeare1.txt'): """A sample pipeline showing exit handler.""" exit_op = dsl.ContainerOp(name='finally', image='library/bash:4.4.23', command=['echo', 'exit!']) with dsl.ExitHandler(exit_op): op1 = dsl.ContainerOp( name='download', image='google/cloud-sdk:216.0.0', command=['sh', '-c'], arguments=['gsutil cat %s | tee /tmp/results.txt' % url], file_outputs={'downloaded': '/tmp/results.txt'}) op2 = dsl.ContainerOp(name='echo', image='library/bash:4.4.23', command=['sh', '-c'], arguments=['echo %s' % op1.output])
def email_pipeline( server_secret="server-secret", subject="Hi, again!", body="Tekton email", sender="*****@*****.**", recipients="[email protected], [email protected]", attachment_filepath="/tmp/data/output.txt" ): email = email_op(server_secret=server_secret, subject=subject, body=body, sender=sender, recipients=recipients, attachment_path=attachment_filepath) email.add_env_variable(env_from_secret('USER', '$(params.server_secret)', 'user')) email.add_env_variable(env_from_secret('PASSWORD', '$(params.server_secret)', 'password')) email.add_env_variable(env_from_secret('TLS', '$(params.server_secret)', 'tls')) email.add_env_variable(env_from_secret('SERVER', '$(params.server_secret)', 'url')) email.add_env_variable(env_from_secret('PORT', '$(params.server_secret)', 'port')) email.apply(onprem.mount_pvc('shared-pvc', 'shared-pvc', attachment_path)) with dsl.ExitHandler(email): write_file_task = write_file(attachment_filepath).apply(onprem.mount_pvc('shared-pvc', 'shared-pvc', attachment_path))
def flipcoin_exit_pipeline(): exit_task = print_op('Exit handler has worked!') with dsl.ExitHandler(exit_task): flip = flip_coin_op() with dsl.Condition(flip.output == 'heads'): random_num_head = get_random_int_op(0, 9) with dsl.Condition(random_num_head.output > 5): print_op('heads and %s > 5!' % random_num_head.output) with dsl.Condition(random_num_head.output <= 5): print_op('heads and %s <= 5!' % random_num_head.output) with dsl.Condition(flip.output == 'tails'): random_num_tail = get_random_int_op(10, 19) with dsl.Condition(random_num_tail.output > 15): print_op('tails and %s > 15!' % random_num_tail.output) with dsl.Condition(random_num_tail.output <= 15): print_op('tails and %s <= 15!' % random_num_tail.output) with dsl.Condition(flip.output == 'tails'): fail_op( message= "Failing the run to demonstrate that exit handler still gets executed." )
def my_pipeline(message: str = 'Hello World!'): exit_task = exit_op(user_input=message) with dsl.ExitHandler(exit_task, name='my-pipeline'): print_op(message=message)
def xgb_train_pipeline( output='gs://{{kfp-default-bucket}}', project='{{kfp-project-id}}', diagnostic_mode='HALT_ON_ERROR', rounds=5, ): output_template = str(output) + '/' + dsl.RUN_ID_PLACEHOLDER + '/data' region='us-central1' workers=2 quota_check=[{'region':region,'metric':'CPUS','quota_needed':12.0}] train_data='gs://ml-pipeline/sample-data/sfpd/train.csv' eval_data='gs://ml-pipeline/sample-data/sfpd/eval.csv' schema='gs://ml-pipeline/sample-data/sfpd/schema.json' true_label='ACTION' target='resolution' required_apis='dataproc.googleapis.com' cluster_name='xgb-%s' % dsl.RUN_ID_PLACEHOLDER # Current GCP pyspark/spark op do not provide outputs as return values, instead, # we need to use strings to pass the uri around. analyze_output = output_template transform_output_train = os.path.join(output_template, 'train', 'part-*') transform_output_eval = os.path.join(output_template, 'eval', 'part-*') train_output = os.path.join(output_template, 'train_output') predict_output = os.path.join(output_template, 'predict_output') _diagnose_me_op = diagnose_me_op( bucket=output, execution_mode=diagnostic_mode, project_id=project, target_apis=required_apis, quota_check=quota_check) with dsl.ExitHandler(exit_op=dataproc_delete_cluster_op( project_id=project, region=region, name=cluster_name )): _create_cluster_op = dataproc_create_cluster_op( project_id=project, region=region, name=cluster_name, initialization_actions=[ os.path.join(_PYSRC_PREFIX, 'initialization_actions.sh'), ], image_version='1.5' ).after(_diagnose_me_op) _analyze_op = dataproc_analyze_op( project=project, region=region, cluster_name=cluster_name, schema=schema, train_data=train_data, output=output_template ).after(_create_cluster_op).set_display_name('Analyzer') _transform_op = dataproc_transform_op( project=project, region=region, cluster_name=cluster_name, train_data=train_data, eval_data=eval_data, target=target, analysis=analyze_output, output=output_template ).after(_analyze_op).set_display_name('Transformer') _train_op = dataproc_train_op( project=project, region=region, cluster_name=cluster_name, train_data=transform_output_train, eval_data=transform_output_eval, target=target, analysis=analyze_output, workers=workers, rounds=rounds, output=train_output ).after(_transform_op).set_display_name('Trainer') _predict_op = dataproc_predict_op( project=project, region=region, cluster_name=cluster_name, data=transform_output_eval, model=train_output, target=target, analysis=analyze_output, output=predict_output ).after(_train_op).set_display_name('Predictor') _cm_op = confusion_matrix_op( predictions=os.path.join(predict_output, 'part-*.csv'), output_dir=output_template ).after(_predict_op) _roc_op = roc_op( predictions_dir=os.path.join(predict_output, 'part-*.csv'), true_class=true_label, true_score_column=true_label, output_dir=output_template ).after(_predict_op)
def xgb_train_pipeline( output='gs://your-gcs-bucket', project='your-gcp-project', cluster_name='xgb-%s' % dsl.RUN_ID_PLACEHOLDER, region='us-central1', train_data='gs://ml-pipeline-playground/sfpd/train.csv', eval_data='gs://ml-pipeline-playground/sfpd/eval.csv', schema='gs://ml-pipeline-playground/sfpd/schema.json', target='resolution', rounds=200, workers=2, true_label='ACTION', ): output_template = str(output) + '/' + dsl.RUN_ID_PLACEHOLDER + '/data' # Current GCP pyspark/spark op do not provide outputs as return values, instead, # we need to use strings to pass the uri around. analyze_output = output_template transform_output_train = os.path.join(output_template, 'train', 'part-*') transform_output_eval = os.path.join(output_template, 'eval', 'part-*') train_output = os.path.join(output_template, 'train_output') predict_output = os.path.join(output_template, 'predict_output') with dsl.ExitHandler(exit_op=dataproc_delete_cluster_op( project_id=project, region=region, name=cluster_name )): _create_cluster_op = dataproc_create_cluster_op( project_id=project, region=region, name=cluster_name, initialization_actions=[ os.path.join(_PYSRC_PREFIX, 'initialization_actions.sh'), ], image_version='1.2' ) _analyze_op = dataproc_analyze_op( project=project, region=region, cluster_name=cluster_name, schema=schema, train_data=train_data, output=output_template ).after(_create_cluster_op).set_display_name('Analyzer') _transform_op = dataproc_transform_op( project=project, region=region, cluster_name=cluster_name, train_data=train_data, eval_data=eval_data, target=target, analysis=analyze_output, output=output_template ).after(_analyze_op).set_display_name('Transformer') _train_op = dataproc_train_op( project=project, region=region, cluster_name=cluster_name, train_data=transform_output_train, eval_data=transform_output_eval, target=target, analysis=analyze_output, workers=workers, rounds=rounds, output=train_output ).after(_transform_op).set_display_name('Trainer') _predict_op = dataproc_predict_op( project=project, region=region, cluster_name=cluster_name, data=transform_output_eval, model=train_output, target=target, analysis=analyze_output, output=predict_output ).after(_train_op).set_display_name('Predictor') _cm_op = confusion_matrix_op( predictions=os.path.join(predict_output, 'part-*.csv'), output_dir=output_template ).after(_predict_op) _roc_op = roc_op( predictions_dir=os.path.join(predict_output, 'part-*.csv'), true_class=true_label, true_score_column=true_label, output_dir=output_template ).after(_predict_op) dsl.get_pipeline_conf().add_op_transformer( gcp.use_gcp_secret('user-gcp-sa'))
def tacosandburritos_train(resource_group, workspace, dataset, mlflow_experiment_id, azdocallbackinfo=None): exit_handler_op = exit_op( kfp_host_url="$(KFP_HOST)", azdocallbackinfo=azdocallbackinfo, run_id=dsl.RUN_ID_PLACEHOLDER, tenant_id="$(AZ_TENANT_ID)", service_principal_id="$(AZ_CLIENT_ID)", service_principal_password="******", pat_env="PAT_ENV").apply(use_azure_secret()).apply( use_kfp_host_secret()).apply(use_image(exit_image_name)).apply( use_secret_var("azdopat", "PAT_ENV", "azdopat")) with dsl.ExitHandler(exit_op=exit_handler_op): operations['mlflowproject'] = mlflow_project_op( mlflow_experiment_id=mlflow_experiment_id, # noqa: E501 kf_run_id=dsl.RUN_ID_PLACEHOLDER).apply( use_databricks_secret()).apply( use_image(mlflow_project_image_name)) # noqa: E501 operations['preprocess'] = preprocess_op( base_path=persistent_volume_path, # noqa: E501 training_folder=training_folder, # noqa: E501 target=training_dataset, image_size=image_size, zipfile=dataset).apply( use_image(preprocess_image_name)) # noqa: E501 operations['preprocess'].after( operations['mlflowproject']) # noqa: E501 operations['training'] = train_op(base_path=persistent_volume_path, training_folder=training_folder, epochs=2, batch=batch, image_size=image_size, lr=0.0001, model_folder=model_folder, images=training_dataset, dataset=operations['preprocess'].outputs['dataset']). \ set_memory_request('16G'). \ add_env_variable(V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)). \ add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \ add_env_variable(V1EnvVar(name="GIT_PYTHON_REFRESH", value='quiet')). \ apply(use_image(train_image_name)) # Spot nodepool target # operations['training'].add_toleration(k8s_client.V1Toleration( # key='kubernetes.azure.com/scalesetpriority', # operator='Equal', # value='spot', # effect="NoSchedule")) # Virtual/ACI nodepool target # operations['training'].add_node_selector_constraint( # label_name='type', value='virtual-kubelet') # operations['training'].add_toleration(k8s_client.V1Toleration( # key='virtual-kubelet.io/provider', operator='Exists')) operations['training'].after(operations['preprocess']) operations['evaluate'] = evaluate_op( model=operations['training'].outputs['model']) operations['evaluate'].after(operations['training']) operations['register to AML'] = register_op(base_path=persistent_volume_path, model_file='latest.h5', model_name=model_name, tenant_id='$(AZ_TENANT_ID)', service_principal_id='$(AZ_CLIENT_ID)', service_principal_password='******', subscription_id='$(AZ_SUBSCRIPTION_ID)', resource_group=resource_group, workspace=workspace, run_id=dsl.RUN_ID_PLACEHOLDER). \ apply(use_azure_secret()). \ apply(use_image(register_images_name)) operations['register to AML'].after(operations['evaluate']) operations['register to mlflow'] = register_mlflow_op(model='model', model_name=model_name, experiment_name='mexicanfood', run_id=dsl.RUN_ID_PLACEHOLDER). \ apply(use_azure_secret()). \ add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \ apply(use_image(register_mlflow_image_name)) operations['register to mlflow'].after(operations['register to AML']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure'))
def cnn_train(resource_group, workspace, dataset, token): """Pipeline steps""" persistent_volume_path = '/mnt/azure' data_download = dataset # noqa: E501 batch = 32 model_name = 'cnnmodel' operations = {} image_size = 160 training_folder = 'train' training_dataset = 'train.txt' model_folder = 'model' image_repo_name = "k8scc01covidmlopsacr.azurecr.io/mlops" callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080' mlflow_url = 'http://mlflow.mlflow:5000' exit_op = dsl.ContainerOp(name='Exit Handler', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload(TRAIN_FINISH_EVENT), callback_url ]) with dsl.ExitHandler(exit_op): start_callback = \ dsl.UserContainer('callback', 'curlimages/curl', command=['curl'], args=['-d', get_callback_payload(TRAIN_START_EVENT), callback_url]) # noqa: E501 operations['tensorflow preprocess'] = dsl.ContainerOp( name='tensorflow preprocess', init_containers=[start_callback], image=image_repo_name + '/tensorflow-preprocess:latest', command=['python'], arguments=[ '/scripts/data.py', '--base_path', persistent_volume_path, '--data', training_folder, '--target', training_dataset, '--img_size', image_size, '--zipfile', data_download ]) operations['tensorflow training'] = dsl.ContainerOp( name="tensorflow training", image=image_repo_name + '/tensorflow-training:latest', command=['python'], arguments=[ '/scripts/train.py', '--base_path', persistent_volume_path, '--data', training_folder, '--epochs', 2, '--batch', batch, '--image_size', image_size, '--lr', 0.0001, '--outputs', model_folder, '--dataset', training_dataset ], output_artifact_paths={ 'mlpipeline-metrics': '/mlpipeline-metrics.json', 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }).apply(use_azstorage_secret()).add_env_variable( V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)).add_env_variable( V1EnvVar( name="MLFLOW_TRACKING_TOKEN", value=token)).add_env_variable( V1EnvVar( name="MLFLOW_TRACKING_URI", value=mlflow_url)).add_env_variable( V1EnvVar( name="GIT_PYTHON_REFRESH", value='quiet')) # noqa: E501 operations['tensorflow training'].after( operations['tensorflow preprocess']) # noqa: E501 operations['evaluate'] = dsl.ContainerOp( name='evaluate', image="busybox", command=['sh', '-c'], arguments=['echo', 'Life is Good!']) operations['evaluate'].after(operations['tensorflow training']) operations['register kubeflow'] = dsl.ContainerOp( name='register kubeflow', image=image_repo_name + '/register-kubeflow-artifacts:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--data', training_folder, '--dataset', training_dataset, '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()) operations['register kubeflow'].after(operations['evaluate']) operations['register AML'] = dsl.ContainerOp( name='register AML', image=image_repo_name + '/register-aml:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id', "$(AZ_CLIENT_ID)", '--service_principal_password', "$(AZ_CLIENT_SECRET)", '--subscription_id', "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group, '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()) operations['register AML'].after(operations['register kubeflow']) operations['register mlflow'] = dsl.ContainerOp( name='register mlflow', image=image_repo_name + '/register-mlflow:latest', command=['python'], arguments=[ '/scripts/register.py', '--model', 'model', '--model_name', model_name, '--experiment_name', 'kubeflow-mlops', '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()).add_env_variable( V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)).add_env_variable( V1EnvVar(name="MLFLOW_TRACKING_TOKEN", value=token)) # noqa: E501 operations['register mlflow'].after(operations['register AML']) operations['finalize'] = dsl.ContainerOp( name='Finalize', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload("Model is registered"), callback_url ]) operations['finalize'].after(operations['register mlflow']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure'))
def ext_handler_pipeline(): exit_op = print_op('Exit') with dsl.ExitHandler(exit_op): flip = flip_coin_op() print_op(flip.output)
def tacosandburritos_train(resource_group, workspace, dataset): """Pipeline steps""" persistent_volume_path = '/mnt/azure' data_download = dataset # noqa: E501 batch = 32 model_name = 'tacosandburritos' operations = {} image_size = 160 training_folder = 'train' training_dataset = 'train.txt' model_folder = 'model' image_repo_name = "kubeflowyoacr.azurecr.io/mexicanfood" callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080' mlflow_url = 'http://mlflow:5000' exit_op = dsl.ContainerOp(name='Exit Handler', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload(TRAIN_FINISH_EVENT), callback_url ]) with dsl.ExitHandler(exit_op): start_callback = \ dsl.UserContainer('callback', 'curlimages/curl', command=['curl'], args=['-d', get_callback_payload(TRAIN_START_EVENT), callback_url]) # noqa: E501 operations['data processing on databricks'] = dsl.ContainerOp( name='data processing on databricks', init_containers=[start_callback], image=image_repo_name + '/databricks-notebook:latest', arguments=[ '-r', dsl.RUN_ID_PLACEHOLDER, '-p', '{"argument_one":"param one","argument_two":"param two"}' ]).apply(use_databricks_secret()) operations['preprocess'] = dsl.ContainerOp( name='preprocess', image=image_repo_name + '/preprocess:latest', command=['python'], arguments=[ '/scripts/data.py', '--base_path', persistent_volume_path, '--data', training_folder, '--target', training_dataset, '--img_size', image_size, '--zipfile', data_download ]) operations['preprocess'].after( operations['data processing on databricks']) # noqa: E501 # train # TODO: read set of parameters from config file with dsl.ParallelFor([{ 'epochs': 1, 'lr': 0.0001 }, { 'epochs': 2, 'lr': 0.0002 }, { 'epochs': 3, 'lr': 0.0003 }]) as item: # noqa: E501 operations['training'] = dsl.ContainerOp( name="training", image=image_repo_name + '/training:latest', command=['python'], arguments=[ '/scripts/train.py', '--base_path', persistent_volume_path, '--data', training_folder, '--epochs', item.epochs, '--batch', batch, '--image_size', image_size, '--lr', item.lr, '--outputs', model_folder, '--dataset', training_dataset ], output_artifact_paths= { # change output_artifact_paths to file_outputs after this PR is merged https://github.com/kubeflow/pipelines/pull/2334 # noqa: E501 'mlpipeline-metrics': '/mlpipeline-metrics.json', 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }).add_env_variable( V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)).add_env_variable( V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)).add_env_variable( V1EnvVar( name="GIT_PYTHON_REFRESH", value='quiet')) # noqa: E501 operations['training'].after(operations['preprocess']) operations['evaluate'] = dsl.ContainerOp( name='evaluate', image="busybox", command=['sh', '-c'], arguments=['echo', 'Life is Good!']) operations['evaluate'].after(operations['training']) # register kubeflow artifcats model operations['register to kubeflow'] = dsl.ContainerOp( name='register to kubeflow', image=image_repo_name + '/registerartifacts:latest', command=['python'], arguments=[ '/scripts/registerartifacts.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--data', training_folder, '--dataset', training_dataset, '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()) operations['register to kubeflow'].after(operations['evaluate']) # register model operations['register to AML'] = dsl.ContainerOp( name='register to AML', image=image_repo_name + '/register:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id', "$(AZ_CLIENT_ID)", '--service_principal_password', "$(AZ_CLIENT_SECRET)", '--subscription_id', "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group, '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()) operations['register to AML'].after(operations['register to kubeflow']) # register model to mlflow operations['register to mlflow'] = dsl.ContainerOp( name='register to mlflow', image=image_repo_name + '/register-mlflow:latest', command=['python'], arguments=[ '/scripts/register.py', '--model', 'model', '--model_name', model_name, '--experiment_name', 'mexicanfood', '--run_id', dsl.RUN_ID_PLACEHOLDER ]).apply(use_azure_secret()).add_env_variable( V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)) # noqa: E501 operations['register to mlflow'].after(operations['register to AML']) operations['finalize'] = dsl.ContainerOp( name='Finalize', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload("Model is registered"), callback_url ]) operations['finalize'].after(operations['register to mlflow']) # operations['deploy'] = dsl.ContainerOp( # name='deploy', # image=image_repo_name + '/deploy:latest', # command=['sh'], # arguments=[ # '/scripts/deploy.sh', # '-n', model_name, # '-m', model_name, # '-t', "$(AZ_TENANT_ID)", # '-r', resource_group, # '-w', workspace, # '-s', "$(AZ_CLIENT_ID)", # '-p', "$(AZ_CLIENT_SECRET)", # '-u', "$(AZ_SUBSCRIPTION_ID)", # '-b', persistent_volume_path, # '-x', dsl.RUN_ID_PLACEHOLDER # ] # ).apply(use_azure_secret()) # operations['deploy'].after(operations['register']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure'))
def tacosandburritos_train(resource_group, workspace, dataset): exit_handler = exit_op( callback_url=callback_url, callback_payload=get_callback_payload(TRAIN_FINISH_EVENT)) with dsl.ExitHandler(exit_handler): operations['data processing on databricks'] = databricks_op(run_id=dsl.RUN_ID_PLACEHOLDER, # noqa: E501 notebook_params='{"argument_one":"param one","argument_two":"param two"}' # noqa: E501 ).apply(use_databricks_secret()). \ add_init_container(get_start_callback_container()). \ apply( use_image(databricks_image_name)) operations['preprocess'] = preprocess_op(base_path=persistent_volume_path, # noqa: E501 training_folder=training_folder, # noqa: E501 target=training_dataset, image_size=image_size, zipfile=dataset). \ apply( use_image(preprocess_image_name)) operations['preprocess'].after( operations['data processing on databricks']) # noqa: E501 operations['training'] = train_op(base_path=persistent_volume_path, training_folder=training_folder, epochs=2, batch=batch, image_size=image_size, lr=0.0001, model_folder=model_folder, images=training_dataset, dataset=operations['preprocess'].outputs['dataset']). \ set_memory_request('16G'). \ add_env_variable(V1EnvVar(name="RUN_ID", value=dsl.RUN_ID_PLACEHOLDER)). \ add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)). \ add_env_variable(V1EnvVar(name="GIT_PYTHON_REFRESH", value='quiet')).apply(use_image(train_image_name)) # noqa: E501, E127 operations['training'].after(operations['preprocess']) operations['evaluate'] = evaluate_op( model=operations['training'].outputs['model']) operations['evaluate'].after(operations['training']) operations['register to AML'] = register_op( base_path=persistent_volume_path, model_file='latest.h5', model_name=model_name, tenant_id='$(AZ_TENANT_ID)', service_principal_id='$(AZ_CLIENT_ID)', service_principal_password='******', subscription_id='$(AZ_SUBSCRIPTION_ID)', resource_group=resource_group, workspace=workspace, run_id=dsl.RUN_ID_PLACEHOLDER).apply(use_azure_secret()).apply( use_image(register_images_name)) # noqa: E501, E127 operations['register to AML'].after(operations['evaluate']) operations['register to mlflow'] = register_mlflow_op(model='model', model_name=model_name, experiment_name='mexicanfood', run_id=dsl.RUN_ID_PLACEHOLDER).apply(use_azure_secret()). \ add_env_variable(V1EnvVar(name="MLFLOW_TRACKING_URI", value=mlflow_url)).apply(use_image(register_mlflow_image_name)) # noqa: E501 operations['register to mlflow'].after(operations['register to AML']) operations['finalize'] = finalize_op( callback_url=callback_url, callback_payload=get_callback_payload("Model is registered")) operations['finalize'].after(operations['register to mlflow']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client. V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-file'))).add_volume_mount( k8s_client.V1VolumeMount(mount_path='/mnt/azure', name='azure'))
def tacosandburritos_train( resource_group, workspace ): """Pipeline steps""" persistent_volume_path = '/mnt/azure' data_download = 'https://aiadvocate.blob.core.windows.net/public/tacodata.zip' # noqa: E501 epochs = 2 batch = 32 learning_rate = 0.0001 model_name = 'tacosandburritos' operations = {} image_size = 160 training_folder = 'train' training_dataset = 'train.txt' model_folder = 'model' image_repo_name = "kubeflowyoacr.azurecr.io/mexicanfood" callback_url = 'kubemlopsbot-svc.kubeflow.svc.cluster.local:8080' exit_op = dsl.ContainerOp( name='Exit Handler', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload(TRAIN_FINISH_EVENT), callback_url ] ) with dsl.ExitHandler(exit_op): start_callback = \ dsl.UserContainer('callback', 'curlimages/curl', command=['curl'], args=['-d', get_callback_payload(TRAIN_START_EVENT), callback_url]) # noqa: E501 operations['preprocess'] = dsl.ContainerOp( name='preprocess', init_containers=[start_callback], image=image_repo_name + '/preprocess:latest', command=['python'], arguments=[ '/scripts/data.py', '--base_path', persistent_volume_path, '--data', training_folder, '--target', training_dataset, '--img_size', image_size, '--zipfile', data_download ] ) # train operations['training'] = dsl.ContainerOp( name='training', image=image_repo_name + '/training:latest', command=['python'], arguments=[ '/scripts/train.py', '--base_path', persistent_volume_path, '--data', training_folder, '--epochs', epochs, '--batch', batch, '--image_size', image_size, '--lr', learning_rate, '--outputs', model_folder, '--dataset', training_dataset ] ) operations['training'].after(operations['preprocess']) # register model operations['register'] = dsl.ContainerOp( name='register', image=image_repo_name + '/register:latest', command=['python'], arguments=[ '/scripts/register.py', '--base_path', persistent_volume_path, '--model', 'latest.h5', '--model_name', model_name, '--tenant_id', "$(AZ_TENANT_ID)", '--service_principal_id', "$(AZ_CLIENT_ID)", '--service_principal_password', "$(AZ_CLIENT_SECRET)", '--subscription_id', "$(AZ_SUBSCRIPTION_ID)", '--resource_group', resource_group, '--workspace', workspace, '--run_id', dsl.RUN_ID_PLACEHOLDER ] ).apply(use_azure_secret()) operations['register'].after(operations['training']) operations['finalize'] = dsl.ContainerOp( name='Finalize', image="curlimages/curl", command=['curl'], arguments=[ '-d', get_callback_payload("Model is registered"), callback_url ] ) operations['finalize'].after(operations['register']) # operations['deploy'] = dsl.ContainerOp( # name='deploy', # image=image_repo_name + '/deploy:latest', # command=['sh'], # arguments=[ # '/scripts/deploy.sh', # '-n', model_name, # '-m', model_name, # '-t', "$(AZ_TENANT_ID)", # '-r', resource_group, # '-w', workspace, # '-s', "$(AZ_CLIENT_ID)", # '-p', "$(AZ_CLIENT_SECRET)", # '-u', "$(AZ_SUBSCRIPTION_ID)", # '-b', persistent_volume_path, # '-x', dsl.RUN_ID_PLACEHOLDER # ] # ).apply(use_azure_secret()) # operations['deploy'].after(operations['register']) for _, op_1 in operations.items(): op_1.container.set_image_pull_policy("Always") op_1.add_volume( k8s_client.V1Volume( name='azure', persistent_volume_claim=k8s_client.V1PersistentVolumeClaimVolumeSource( # noqa: E501 claim_name='azure-managed-disk') ) ).add_volume_mount(k8s_client.V1VolumeMount( mount_path='/mnt/azure', name='azure'))
def mnist_pipeline(volume_size, learning_rate, dropout_rate, checkpoint_dir, model_version, saved_model_dir, tensorboard_log, namespace, storage_uri, name): exit_task = echo_op("Done!") with dsl.ExitHandler(exit_task): vop = dsl.VolumeOp(name='mnist_model', resource_name='mnist_model', storage_class="nfs-client", modes=dsl.VOLUME_MODE_RWM, size=volume_size) mnist = dsl.ContainerOp( name='Mnist', image= 'kubeflow-registry.default.svc.cluster.local:30000/katib-job:8EA9F526', command=['python', '/app/save_model_mnist.py'], arguments=[ "--learning_rate", learning_rate, "--dropout_rate", dropout_rate, "--checkpoint_dir", checkpoint_dir, "--model_version", model_version, "--saved_model_dir", saved_model_dir, "--tensorboard_log", tensorboard_log ], pvolumes={"/result": vop.volume}, output_artifact_paths={ 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }, container_kwargs={ 'env': [ V1EnvVar('S3_ENDPOINT', 'minio-service.kubeflow.svc.cluster.local:9000'), V1EnvVar( 'AWS_ENDPOINT_URL', 'http://minio-service.kubeflow.svc.cluster.local:9000' ), V1EnvVar('AWS_ACCESS_KEY_ID', 'minio'), V1EnvVar('AWS_SECRET_ACCESS_KEY', 'minio123'), V1EnvVar('AWS_REGION', 'us-east-1'), V1EnvVar('S3_USE_HTTPS', '0'), V1EnvVar('S3_VERIFY_SSL', '0'), ] }) result = dsl.ContainerOp(name='list_list', image='library/bash:4.4.23', command=['ls', '-R', '/result'], pvolumes={"/result": mnist.pvolume}) ''' kfserving = dsl.ContainerOp( name='kfserving', image='kubeflow-registry.default.svc.cluster.local:30000/kfserving:D0BE75E', command=['python', '/app/kfserving_fairing.py'], arguments=[ "--namespace", namespace, "--storage_uri", "pvc://" + str(vop.volume.persistent_volume_claim.claim_name) + str(storage_uri), "--name", name ], pvolumes={"/result": mnist.pvolume} ) ''' kfserving = kfserving_op( action='update', model_name=name, default_model_uri="pvc://" + str(vop.volume.persistent_volume_claim.claim_name) + str(storage_uri), canary_model_uri='', canary_model_traffic_percentage='0', namespace='kubeflow', framework='tensorflow', default_custom_model_spec='{}', canary_custom_model_spec='{}', autoscaling_target='0', kfserving_endpoint='') mnist_web_ui = dsl.ContainerOp( name='mnist_web_ui', image='brightfly/kfserving-mnist-web-ui-deploy:latest', ) mnist.after(vop) result.after(mnist) kfserving.after(mnist) mnist_web_ui.after(kfserving)