def my_pipeline2(year: int): dvop = dsl.VolumeOp( name="create_pvc", resource_name="my-pvc-2", size="5Gi", modes=dsl.VOLUME_MODE_RWO) tldvop = dsl.VolumeOp( name="create_pvc", resource_name="tld-volume-2", size="100Mi", modes=dsl.VOLUME_MODE_RWO) download_data_op = kfp.components.func_to_container_op( download_data, packages_to_install=['lxml', 'requests']) download_tld_info_op = kfp.components.func_to_container_op( download_tld_data, packages_to_install=['requests', 'pandas>=0.24', 'tables']) clean_data_op = kfp.components.func_to_container_op( clean_data, packages_to_install=['pandas>=0.24', 'tables']) #tag::add_feature_step[] prepare_features_op = kfp.components.func_to_container_op( prepare_features, packages_to_install=['pandas>=0.24', 'tables', 'scikit-learn']) #tag::end_feature_step[] step1 = download_data_op(year).add_pvolumes({"/data_processing": dvop.volume}) step2 = clean_data_op(input_path=step1.output).add_pvolumes({"/data_processing": dvop.volume}) step3 = download_tld_info_op().add_pvolumes({"/tld_info": tldvop.volume}) step4 = prepare_features_op(input_path=step2.output, tld_info_path=step3.output).add_pvolumes({ "/data_processing": dvop.volume, "/tld_info": tldvop.volume})
def ml_pipeline(): pvc1 = dsl.VolumeOp( name = "PVC-data", resource_name = "pvc-ojt-ml", storage_class = "ojt-tibero-ml", modes = dsl.VOLUME_MODE_RWM, size = "1Gi" ) pvc2 = dsl.VolumeOp( name = "PVC-was", resource_name = "pvc-ojt-result", storage_class = "ojt-tibero-result", modes = dsl.VOLUME_MODE_RWM, size = "1Gi" ) pod1 = dsl.ContainerOp( name = 'Tibero-JDBC agent', image = 'hanjoo8821/jdbc-tibero:colsout-' + ver, container_kwargs={'env':[V1EnvVar('id', 'hanjoo'), V1EnvVar('pw', '1010'), V1EnvVar('sql', 'SELECT * FROM AB1_2 LEFT JOIN ACCOUNTING ON AB1_2.EMP_NUM = ACCOUNTING.EMP_NUM'), V1EnvVar('col1', 'EMP_NUM'), V1EnvVar('col2', 'BIRTH'), V1EnvVar('col3', 'SALARY')]}, pvolumes = {"/Output": pvc1.volume} ) pod2a = dsl.ContainerOp( name = 'Print-EMP_NUM', image = 'alpine:3.6', command = ['cat', '/Output/EMP_NUM.txt'], pvolumes = {"/Output": pod1.pvolume} ) pod2b = dsl.ContainerOp( name = 'Trans-BIRTH', image = 'hanjoo8821/jdbc-tibero:trans-birth-' + ver, container_kwargs={'env':[V1EnvVar('col2', 'BIRTH')]}, pvolumes = {"/Output": pod1.pvolume} ) pod2c = dsl.ContainerOp( name = 'Trans-SALARY', image = 'hanjoo8821/jdbc-tibero:trans-salary-' + ver, container_kwargs={'env':[V1EnvVar('col3', 'SALARY')]}, pvolumes = {"/Output": pod1.pvolume} ) pod3 = dsl.ContainerOp( name = 'ML-Linear Regression', image = 'hanjoo8821/jdbc-tibero:ml-' + ver, container_kwargs={'env':[V1EnvVar('col2', 'BIRTH'), V1EnvVar('col3', 'SALARY')]}, pvolumes = {"/Output": pod2b.pvolume} ).after(pod2b, pod2c) pod4 = dsl.ContainerOp( name = 'Visualization', image = 'hanjoo8821/jdbc-tibero:graph-' + ver, container_kwargs={'env':[V1EnvVar('col2', 'BIRTH'), V1EnvVar('col3', 'SALARY')]}, pvolumes = {"/Output": pod3.pvolume, "/WAS": pvc2.volume} )
def presidential_election_pipeline( data_path: str, regr_multirf_file: str ): # Define volume to share data between components. vop = dsl.VolumeOp( name="create_volume", resource_name="data-volume", size="1Gi", modes=dsl.VOLUME_MODE_RWO) # Create presidential elections training component. presidential_elections_training_container = train_op(data_path, regr_multirf_file) .add_pvolumes({data_path: vop.volume}) # Create presidential elections prediction component. presidential_elections_predict_container = predict_op(data_path, regr_multirf_file) .add_pvolumes({data_path: presidential_elections_training_container.pvolume}) # Print the result of the prediction presidential_elections_result_container = dsl.ContainerOp( name="print_prediction", image='library/bash:4.4.23', pvolumes={data_path: presidential_elections_predict_container.pvolume}, arguments=['cat', f'{data_path}/result.txt'] )
def mnist_pipeline(learning_rate, dropout_rate, checkpoint_dir, saved_model_dir, tensorboard_log): exit_task = echo_op("Done!") with dsl.ExitHandler(exit_task): vop = dsl.VolumeOp(name="mnist_model_volume", resource_name="mnist_model", storage_class="nfs-client", modes=dsl.VOLUME_MODE_RWM, size="10Gi") mnist = dsl.ContainerOp( name='Mnist', image= 'kubeflow-registry.default.svc.cluster.local:30000/katib-job:2B27615F', command=['python', '/app/mnist_to_pipeline.py'], arguments=[ "--learning_rate", learning_rate, "--dropout_rate", dropout_rate, "--checkpoint_dir", checkpoint_dir, "--saved_model_dir", saved_model_dir, "--tensorboard_log", tensorboard_log ], pvolumes={"/result": vop.volume}) result = dsl.ContainerOp(name='list_list', image='library/bash:4.4.23', command=['ls', '-R', '/result'], pvolumes={"/result": mnist.pvolume}) mnist.after(vop) result.after(mnist)
def sequential_pipeline(model_data_url = "https://test-epi.s3.fr-par.scw.cloud/kc_house_data.csv?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=SCWTTHE3QW9915A46GMW%2F20200206%2Ffr-par%2Fs3%2Faws4_request&X-Amz-Date=20200206T200330Z&X-Amz-Expires=521789&X-Amz-Signature=56952a33bf8a12b255ed573a2a4e05dd901eec9985a98ed332f89c61ad55a2cd&X-Amz-SignedHeaders=host"): model_data_dest = "/data/kc_house_data.csv" vop = dsl.VolumeOp( name="vol", resource_name="newpvc", size="1Gi", modes=dsl.VOLUME_MODE_RWM ) op_download = dsl.ContainerOp( name='download', image='toune/epi-saas-project2-download', pvolumes={"/data": vop.volume} ) op_download.container.add_env_variable(V1EnvVar(name='DOWNLOAD_FILE_URL', value=model_data_url)) op_download.container.add_env_variable(V1EnvVar(name='DOWNLOAD_FILE_DEST', value=model_data_dest)) op_predict = dsl.ContainerOp( name='predict', image='toune/epi-saas-project2-app', pvolumes={"/data": vop.volume} ) op_predict.container.add_env_variable(V1EnvVar(name='TRAINED_MODEL_PATH', value="/data/model-artifacts")) op_predict.container.add_env_variable(V1EnvVar(name='DATA_PATH', value=model_data_dest)) op_predict.after(op_download)
def hello_word(): vop = dsl.VolumeOp(name="create_pvc", resource_name="my-pvc", size="2Gi", modes=dsl.VOLUME_MODE_RWM) step1 = dsl.ContainerOp(name="download", image="google/cloud-sdk:295.0.0-slim", command=["gsutil", "cp", "-r"], arguments=["gs://raw_movie_data", "/mnt"], pvolumes={"/mnt": vop.volume}) step2 = dsl.ContainerOp(name="step2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["ls", "/mnt"], pvolumes={"/mnt": step1.pvolume}) step3 = dsl.ContainerOp(name="step3", image="library/bash:4.4.23", command=[ "cat", "/mnt/raw_movie_data/links.csv", "/mnt/raw_movie_data/ratings.csv" ], pvolumes={"/mnt": step2.pvolume})
def mnist_container_pipeline(data_path='/mnt', model_file='mnist_model.h5', IMAGE_NUMBER='0'): vop = dsl.VolumeOp( name='create_volume', resource_name='data-volume', size='1Gi', modes=dsl.VOLUME_MODE_RWM ) # We have already created a volume and a Glued component(Docker+Python script). This Glued component needs # to communicate with the volume so lets attach a volume to Glued component mnist_training_container = train_op(data_path, model_file) \ .add_pvolumes({data_path: vop.volume}) # Create MNIST prediction component. mnist_predict_container = predict_op(data_path, model_file, IMAGE_NUMBER) \ .add_pvolumes({data_path: mnist_training_container.pvolume}) # Print the result of the prediction mnist_result_container = dsl.ContainerOp( name="print_prediction", image='library/bash:4.4.23', pvolumes={data_path: mnist_predict_container.pvolume}, arguments=['cat', f'{data_path}/result.txt'] )
def _setup_volumes(): vop = dsl.VolumeOp( name="data-volume-create", resource_name="data-volume", size=self.volume_meta.size, modes=self.volume_meta.access_modes, storage_class=self.volume_meta.storageclass, ) if self.volume_meta.skip_init: return {"/home/kedro/data": vop.volume} else: volume_init = dsl.ContainerOp( name="data-volume-init", image=image, command=["sh", "-c"], arguments=[ " ".join([ "cp", "--verbose", "-r", "/home/kedro/data/*", "/home/kedro/datavolume", ]) ], pvolumes={"/home/kedro/datavolume": vop.volume}, ) volume_init.container.set_image_pull_policy(image_pull_policy) return {"/home/kedro/data": volume_init.pvolume}
def det_train_pipeline( detmaster, mlrepo="https://github.com/determined-ai/determined.git", branch="0.13.0", config="examples/official/trial/mnist_pytorch/const.yaml", context="examples/official/trial/mnist_pytorch/", model_name="mnist-prod", deployment_name="mnist-prod-kf", deployment_namespace="david", image="davidhershey/seldon-mnist:1.6"): volume_op = dsl.VolumeOp( name="create pipeline volume", resource_name="mlrepo-pvc", modes=["ReadWriteOnce"], size="3Gi", ) clone = clone_mlrepo(mlrepo, branch, volume_op.volume) train = (run_det_and_wait_op(detmaster, config, context).add_pvolumes({ "/src/": clone.pvolume }).after(clone)) register = (register_op(detmaster, train.output, model_name).after(train)) deploy = create_seldon_op( detmaster, deployment_name, deployment_namespace, model_name, image, ).after(register)
def training(input_directory = "/pvc/input", output_directory = "/pvc/output", handlerFile = "image_classifier"): vop = dsl.VolumeOp( name="volume_creation", resource_name="pvcm", modes=dsl.VOLUME_MODE_RWO, size="1Gi" ) prep_output = bert_data_prep_op( input_data = [{"dataset_url":"https://kubeflow-dataset.s3.us-east-2.amazonaws.com/ag_news_csv.tar.gz"}], container_entrypoint = [ "python", "/pvc/input/bert_pre_process.py", ], output_data = ["/pvc/output/processing"], source_code = ["https://kubeflow-dataset.s3.us-east-2.amazonaws.com/bert_pre_process.py"], source_code_path = ["/pvc/input"] ).add_pvolumes({"/pvc":vop.volume}) train_output = bert_train_op( input_data = ["/pvc/output/processing"], container_entrypoint = [ "python", "/pvc/input/bert_train.py", ], output_data = ["/pvc/output/train/models"], input_parameters = [{"tensorboard_root": "/pvc/output/train/tensorboard", "max_epochs": 1, "num_samples": 150, "batch_size": 4, "num_workers": 1, "learning_rate": 0.001, "accelerator": None}], source_code = ["https://kubeflow-dataset.s3.us-east-2.amazonaws.com/bert_train.py"], source_code_path = ["/pvc/input"] ).add_pvolumes({"/pvc":vop.volume}).after(prep_output)
def volumeop_basic(size: str = "1Gi"): vop = dsl.VolumeOp(name="create-pvc", resource_name="my-pvc", modes=dsl.VOLUME_MODE_RWO, size=size) write_to_volume().add_pvolumes({"/mnt'": vop.volume})
def det_train_pipeline( detmaster, mlrepo="https://github.com/determined-ai/determined.git", branch="0.13.0", config="examples/official/trial/mnist_pytorch/const.yaml", context="examples/official/trial/mnist_pytorch/", model_name="mnist-prod", deployment_name="mnist-prod-kf", deployment_namespace="david", image="davidhershey/seldon-mnist:1.6" ): volume_op = dsl.VolumeOp( name="create pipeline volume", resource_name="mlrepo-pvc", modes=["ReadWriteOnce"], size="3Gi", ) clone = clone_mlrepo(mlrepo, branch, volume_op.volume) train = ( run_det_and_wait_op(detmaster, config, context) .add_pvolumes({"/src/": clone.pvolume}) .after(clone) ) decide = decide_op(detmaster, train.output, model_name) with dsl.Condition(decide.output == True, name="Deploy"): deploy = create_seldon_op( detmaster, deployment_name, deployment_namespace, model_name, image, ) with dsl.Condition(decide.output == False, name="No-Deploy"): print_op('Model Not Deployed -- Performance was not better than previous version')
def volumeop_parallel(): vop = dsl.VolumeOp( name="create_pvc", resource_name="my-pvc", size="10Gi", modes=dsl.VOLUME_MODE_RWM) step1 = dsl.ContainerOp( name="step1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo 1 | tee /mnt/file1"], pvolumes={"/mnt": vop.volume}) step2 = dsl.ContainerOp( name="step2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo 2 | tee /common/file2"], pvolumes={"/common": vop.volume}) step3 = dsl.ContainerOp( name="step3", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo 3 | tee /mnt3/file3"], pvolumes={"/mnt3": vop.volume})
def serving_op(image:str,bucket_name:str,model_name:str,model_version:str): namespace='kfserving-inference-service' runtime_version='2.0.0' service_account_name='sa' storage_url=f"s3://{bucket_name}/{model_name}/{model_version}" volume_op = dsl.VolumeOp(name="create pipeline volume", resource_name="pipeline-pvc", modes=["ReadWriteOnce"], size="3Gi") op = dsl.ContainerOp( name='serve model', image=image, command=["python", f"/workspace/deployment/server.py"], arguments=[ '--namespace', namespace, '--name', f'{model_name}-{model_version}-`', '--storeage_url', storage_url, '--runtime_version', runtime_version, '--service_account_name', service_account_name ], container_kwargs={'image_pull_policy': "IfNotPresent"}, pvolumes={"/workspace": volume_op.volume}) return op
def fmri_pipeline(): vop = dsl.VolumeOp(name="datapvc", resource_name="newpvc", size="10Gi", modes=dsl.VOLUME_MODE_RWO) #tag::step1[] step1 = dsl.ContainerOp(name="generatedata", image="rawkintrevo/r-fmri-gen:latest", command=[ "Rscript", "/pipelines/component/src/program.R", "--out", "/data/synthetic" ], pvolumes={"/data": vop.volume}) #end::step1[] #tag::step2[] step2 = dsl.ContainerOp( name="prepdata", image="rawkintrevo/py-fmri-prep:0.2", command=["python", "/pipelines/component/src/program.py"], arguments=["/data/synthetic.nii.gz", "/data/s.csv"], pvolumes={"/data": step1.pvolume}) #end::step2[] #tag::step3[] rop = dsl.ResourceOp( name="spark-scala-mahout-fmri", k8s_resource=container_manifest, action="create", success_condition="status.applicationState.state == COMPLETED").after( step2)
def param_substitutions(): vop = dsl.VolumeOp(name="create_volume", resource_name="data", size="1Gi") op = dsl.ContainerOp(name="cop", image="image", arguments=["--param", vop.output], pvolumes={"/mnt": vop.volume})
def iris_pipeline(): vol = dsl.VolumeOp( name="create-pvc", resource_name="modelpvc", size="1Gi", modes=dsl.VOLUME_MODE_RWO, ) preproc_op = kfp.components.load_component_from_file( "components/hello_world/iris-load/component.yaml") preproc = preproc_op() train_op = kfp.components.load_component_from_file( "components/hello_world/iris-train/component.yaml") train = train_op( x_train_path=preproc.outputs["x_train_path"], y_train_path=preproc.outputs["y_train_path"], model_path="/mnt/model.pkl", ) train.add_pvolumes({"/mnt": vol.volume}) test_op = kfp.components.load_component_from_file( "components/hello_world/iris-test/component.yaml") test = test_op( x_test_path=preproc.outputs["x_test_path"], y_test_path=preproc.outputs["y_test_path"], model_path="/mnt/model.pkl", ) test.add_pvolumes({"/mnt": train.pvolume}) serve_op = kfp.components.load_component_from_file( "components/hello_world/iris-serve/component.yaml") serve = serve_op(pvc=vol.outputs["name"]).after(train)
def volume_snapshotop_sequential(url): vop = dsl.VolumeOp( name="create_volume", resource_name="vol1", size="1Gi", modes=dsl.VOLUME_MODE_RWM ) step1 = dsl.ContainerOp( name="step1_ingest", image="google/cloud-sdk:279.0.0", command=["sh", "-c"], arguments=["mkdir /data/step1 && " "gsutil cat %s | gzip -c >/data/step1/file1.gz" % url], pvolumes={"/data": vop.volume} ) step1_snap = dsl.VolumeSnapshotOp( name="step1_snap", resource_name="step1_snap", volume=step1.pvolume ) step2 = dsl.ContainerOp( name="step2_gunzip", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["mkdir /data/step2 && " "gunzip /data/step1/file1.gz -c >/data/step2/file1"], pvolumes={"/data": step1.pvolume} ) step2_snap = dsl.VolumeSnapshotOp( name="step2_snap", resource_name="step2_snap", volume=step2.pvolume ) step3 = dsl.ContainerOp( name="step3_copy", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["mkdir /data/step3 && " "cp -av /data/step2/file1 /data/step3/file3"], pvolumes={"/data": step2.pvolume} ) step3_snap = dsl.VolumeSnapshotOp( name="step3_snap", resource_name="step3_snap", volume=step3.pvolume ) step4 = dsl.ContainerOp( name="step4_output", image="library/bash:4.4.23", command=["cat", "/data/step2/file1", "/data/step3/file3"], pvolumes={"/data": step3.pvolume} )
def reliance_object_detection(): vop = dsl.VolumeOp( name="objectdetection_pvc", resource_name="Template-detection-dev", size="8Gi", modes=dsl.VOLUME_MODE_RWO )
def pipeline_head(git_repo, branch="master", rev='HEAD', git_secret="git-creds"): src_vol_op = dsl.VolumeOp(name="Git_source_pvc", resource_name="git-pvc", size='60Mi', modes=dsl.VOLUME_MODE_RWM) gitsync_step = dsl.ContainerOp(name="Git-sync", image="k8s.gcr.io/git-sync/git-sync:v3.3.0", arguments=[ "--ssh", f"--repo={git_repo}", "--root=/tmp/src", "--dest=pipeline_source", f"--rev={rev}", f"--branch={branch}", "--one-time" ], pvolumes={"/tmp/src": src_vol_op.volume}) gitsync_step.add_volume( k8s_client.V1Volume(name='git-cred-volume', secret=k8s_client.V1SecretVolumeSource( secret_name=git_secret))).add_volume_mount( k8s_client.V1VolumeMount( mount_path="/etc/git-secret", name="git-cred-volume")) gitsync_step.execution_options.caching_strategy.max_cache_staleness = "P0D" step1 = dsl.ContainerOp( name="step1", image="python:3.8", command=["python"], arguments=[ "/tmp/src/pipeline_source/step1.py", "--arg1", "input_arg1", "--arg2", "input_arg2" ], pvolumes={ "/tmp/src": src_vol_op.volume.after(gitsync_step) }).add_env_variable( k8s_client.V1EnvVar(name="PYTHONPATH", value="/tmp/src/pipeline_source")) step1.execution_options.caching_strategy.max_cache_staleness = "P0D" step2 = dsl.ContainerOp(name="step2", image="python:3.8", command=["python"], arguments=[ "/tmp/src/pipeline_source/step2.py", "--arg1", "input_arg1", "--arg2", "input_arg2" ], pvolumes={ "/tmp/src": src_vol_op.volume.after(step1) }).add_env_variable( k8s_client.V1EnvVar( name="PYTHONPATH", value="/tmp/src/pipeline_source")) step2.execution_options.caching_strategy.max_cache_staleness = "P0D"
def auto_generated_pipeline(booltest='True', d1='5', d2='6', strtest='test'): pvolumes_dict = OrderedDict() volume_step_names = [] volume_name_parameters = [] marshal_vop = dsl.VolumeOp(name="kale_marshal_volume", resource_name="kale-marshal-pvc", modes=dsl.VOLUME_MODE_RWM, size="1Gi") volume_step_names.append(marshal_vop.name) volume_name_parameters.append(marshal_vop.outputs["name"].full_name) pvolumes_dict['/marshal'] = marshal_vop.volume volume_step_names.sort() volume_name_parameters.sort() create_matrix_task = create_matrix_op(d1, d2)\ .add_pvolumes(pvolumes_dict)\ .after() step_limits = {'nvidia.com/gpu': '2'} for k, v in step_limits.items(): create_matrix_task.container.add_resource_limit(k, v) create_matrix_task.container.working_dir = "/kale" create_matrix_task.container.set_security_context( k8s_client.V1SecurityContext(run_as_user=0)) output_artifacts = {} output_artifacts.update({'mlpipeline-metrics': '/mlpipeline-metrics.json'}) output_artifacts.update( {'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'}) output_artifacts.update({'create_matrix': '/create_matrix.html'}) create_matrix_task.output_artifact_paths.update(output_artifacts) dep_names = create_matrix_task.dependent_names + volume_step_names create_matrix_task.add_pod_annotation( "kubeflow-kale.org/dependent-templates", json.dumps(dep_names)) if volume_name_parameters: create_matrix_task.add_pod_annotation( "kubeflow-kale.org/volume-name-parameters", json.dumps(volume_name_parameters)) sum_matrix_task = sum_matrix_op()\ .add_pvolumes(pvolumes_dict)\ .after(create_matrix_task) sum_matrix_task.container.working_dir = "/kale" sum_matrix_task.container.set_security_context( k8s_client.V1SecurityContext(run_as_user=0)) output_artifacts = {} output_artifacts.update({'mlpipeline-metrics': '/mlpipeline-metrics.json'}) output_artifacts.update( {'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'}) output_artifacts.update({'sum_matrix': '/sum_matrix.html'}) sum_matrix_task.output_artifact_paths.update(output_artifacts) dep_names = sum_matrix_task.dependent_names + volume_step_names sum_matrix_task.add_pod_annotation("kubeflow-kale.org/dependent-templates", json.dumps(dep_names)) if volume_name_parameters: sum_matrix_task.add_pod_annotation( "kubeflow-kale.org/volume-name-parameters", json.dumps(volume_name_parameters))
def mnist_pipeline(learning_rate, dropout_rate, checkpoint_dir, saved_model_dir, tensorboard_log): exit_task = echo_op("Done!") with dsl.ExitHandler(exit_task): vop = dsl.VolumeOp(name="mnist_model_volume", resource_name="mnist_model", storage_class="nfs-client", modes=dsl.VOLUME_MODE_RWM, size="10Gi") mnist = dsl.ContainerOp( name='Mnist', image= 'kubeflow-registry.default.svc.cluster.local:30000/katib-job:2B27615F', command=['python', '/app/mnist_to_pipeline.py'], arguments=[ "--learning_rate", learning_rate, "--dropout_rate", dropout_rate, "--checkpoint_dir", checkpoint_dir, "--saved_model_dir", saved_model_dir, "--tensorboard_log", tensorboard_log ], pvolumes={"/result": vop.volume}, output_artifact_paths={ 'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json' }, container_kwargs={ 'env': [ V1EnvVar('S3_ENDPOINT', 'minio-service.kubeflow.svc.cluster.local:9000'), V1EnvVar( 'AWS_ENDPOINT_URL', 'http://minio-service.kubeflow.svc.cluster.local:9000' ), V1EnvVar('AWS_ACCESS_KEY_ID', 'minio'), V1EnvVar('AWS_SECRET_ACCESS_KEY', 'minio123'), V1EnvVar('AWS_REGION', 'us-east-1'), V1EnvVar('S3_USE_HTTPS', '0'), V1EnvVar('S3_VERIFY_SSL', '0'), ] }) result = dsl.ContainerOp(name='list_list', image='library/bash:4.4.23', command=['ls', '-R', '/result'], pvolumes={"/result": mnist.pvolume}) mnist.after(vop) result.after(mnist) arguments = { 'learning_rate': '0.01', 'dropout_rate': '0.2', 'checkpoint_dir': '/reuslt/training_checkpoints', 'model_version': '001', 'saved_model_dir': '/result/saved_model', 'tensorboard_log': '/result/log' }
def Volume(name: str, action: str, resource_name: str, volume_name: str, modes: str, size: str) -> dsl.VolumeOp: return dsl.VolumeOp(name=name, action=action, resource_name=resource_name, volume_name=volume_name, modes=modes, size=size)
def volume_snapshotop_rokurl(rok_url): vop1 = dsl.VolumeOp( name="create_volume_1", resource_name="vol1", size="1Gi", annotations={"rok/origin": rok_url}, modes=dsl.VOLUME_MODE_RWM) step1 = dsl.ContainerOp( name="step1_concat", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["cat /data/file*| gzip -c >/data/full.gz"], pvolumes={"/data": vop1.volume}) step1_snap = dsl.VolumeSnapshotOp( name="create_snapshot_1", resource_name="snap1", volume=step1.pvolume) vop2 = dsl.VolumeOp( name="create_volume_2", resource_name="vol2", data_source=step1_snap.snapshot, size=step1_snap.outputs["size"]) step2 = dsl.ContainerOp( name="step2_gunzip", image="library/bash:4.4.23", command=["gunzip", "-k", "/data/full.gz"], pvolumes={"/data": vop2.volume}) step2_snap = dsl.VolumeSnapshotOp( name="create_snapshot_2", resource_name="snap2", volume=step2.pvolume) vop3 = dsl.VolumeOp( name="create_volume_3", resource_name="vol3", data_source=step2_snap.snapshot, size=step2_snap.outputs["size"]) step3 = dsl.ContainerOp( name="step3_output", image="library/bash:4.4.23", command=["cat", "/data/full"], pvolumes={"/data": vop3.volume})
def mnist_container_pipeline(data_path='/mnt', model_file='mnist_model.h5'): vop = dsl.VolumeOp(name='create_volume', resource_name='data-volume', size='1Gi', modes=dsl.VOLUME_MODE_RWM) # We have already created a volume and a Glued component(Docker+Python script). This Glued component needs # to communicate with the volume so lets attach a volume to Glued component mnist_training_container = train_op(data_path, model_file) \ .add_pvolumes({data_path: vop.volume})
def volumeop_basic(size): vop = dsl.VolumeOp(name="create-pvc", resource_name="my-pvc", modes=dsl.VOLUME_MODE_RWO, size=size) cop = dsl.ContainerOp(name="cop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo foo > /mnt/file1"], pvolumes={"/mnt": vop.volume})
def train_pipeline( num_proc: int = 1, volume_size: str = "50Gi", data_name: str = "GroceriesReal", logdir: str = "gs://thea-dev/runs/yyyymmdd-hhmm", docker_image: str = ( "gcr.io/unity-ai-thea-test/datasetinsights:<git-comit-sha>"), config_file: str = ( "datasetinsights/configs/faster_rcnn_groceries_real.yaml"), epochs: int = 50, ): # Create large persistant volume to store training data. vop = dsl.VolumeOp( name="train-pvc", resource_name="train-pvc", size=volume_size, modes=dsl.VOLUME_MODE_RWO, ) # Dataset Download download = dsl.ContainerOp( name="groceriesreal download", image=docker_image, command=["python", "-m", "datasetinsights.scripts.public_download"], arguments=[f"--name={data_name}"], pvolumes={"/data": vop.volume}, ) # Train train = dsl.ContainerOp( name="train", image=docker_image, command=["python", "-m", "datasetinsights.cli"], arguments=[ "--local_rank=0", "train", f"--config={config_file}", f"--logdir={logdir}", "train.epochs", epochs, ], pvolumes={"/data": download.pvolumes["/data"]}, ) # Request GPU train.set_gpu_limit(NUM_GPU) train.add_node_selector_constraint("cloud.google.com/gke-accelerator", GPU_TYPE) # Request Memory train.set_memory_request(MEMORY_LIMIT) train.set_memory_limit(MEMORY_LIMIT) # Use GCP Service Accounts to access to GCP resources train.apply(gcp.use_gcp_secret("user-gcp-sa")) # train.set_timeout(DEFAULT_TIMEOUT)
def volumeop_basic(size: str = "10M"): vop = dsl.VolumeOp( name="create-pvc", resource_name="my-pvc", modes=dsl.VOLUME_MODE_RWO, size=size # success_condition="status.phase = Bound", # failure_condition="status.phase = Failed" ) cop = cop_op().add_pvolumes({"/mnt": vop.volume})
def SendMsg(trial, epoch, patience): vop = dsl.VolumeOp(name="pvc", resource_name="pvc", size='1Gi', modes=dsl.VOLUME_MODE_RWO) return dsl.ContainerOp( name='Train', image='hubdocker76/demotrain:v1', command=['python3', 'train.py'], arguments=['--trial', trial, '--epoch', epoch, '--patience', patience], pvolumes={'/data': vop.volume})
def pipeline( dataset="https://data.keithito.com/data/speech/LJSpeech-1.1.tar.bz2", directory="/mnt/kf/", batch_size=16, learning_rate=0.001, log_step=1, save_step=1, epochs=1, ): OWNER = os.environ['OWNER'] VERSION = os.environ['KF_PIPELINE_VERSION'] volume = dsl.VolumeOp(name="volume_creation", resource_name="share", size="20Gi") Dataset_Download = dsl.ContainerOp( name="dataset download", image=f"{OWNER}/kf-dataset:{VERSION}", arguments=[f"--url={dataset}", f"--directory={directory}"], pvolumes={f"{directory}": volume.volume}, ) Training = dsl.ContainerOp( name="training model", image=f"{OWNER}/kf-training:{VERSION}", arguments=[ f"--dir_data={directory}/dataset", f"--dir_checkpoints={directory}/models", f"--batch_size={batch_size}", f"--learning_rate={learning_rate}", f"--log_step={log_step}", f"--save_step={save_step}", f"--epochs={epochs}", ], pvolumes={f"{directory}": volume.volume}, ) Training.after(Dataset_Download) Seving = dsl.ContainerOp( name="serving", image=f"{OWNER}/kf-webapp:{VERSION}", arguments=[ f"--result={directory}/results", f"--directory={directory}/models", f"--model=model.pth.tar", ], pvolumes={f"{directory}": volume.volume}, ) Seving.after(Training)