def test_use_importer_should_error(self): @dsl.pipeline(name='test-pipeline') def my_pipeline(): dsl.importer(artifact_uri='dummy', artifact_class=Artifact) with self.assertRaisesRegex( ValueError, 'dsl.importer is not supported with v1 compiler.', ): compiler.Compiler( mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE).compile( pipeline_func=my_pipeline, package_path='result.json')
def test_two_step_pipeline(self): @dsl.pipeline(pipeline_root='gs://output-directory/v2-artifacts', name='my-test-pipeline') def v2_compatible_two_step_pipeline(): preprocess_task = preprocess(uri='uri-to-import', some_int=12) train_task = train( num_steps=preprocess_task.outputs['output_parameter_one'], dataset=preprocess_task.outputs['output_dataset_one']) kfp_compiler = compiler.Compiler( mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE) self._assert_compiled_pipeline_equals_golden( kfp_compiler, v2_compatible_two_step_pipeline, 'v2_compatible_two_step_pipeline.yaml')
def test_constructing_container_op_directly_should_error(self): @dsl.pipeline(name='test-pipeline') def my_pipeline(): v1dsl.ContainerOp(name='comp1', image='gcr.io/dummy', command=['python', 'main.py']) with self.assertRaisesRegex( RuntimeError, 'Constructing ContainerOp instances directly is deprecated and not ' 'supported when compiling to v2 \(using v2 compiler or v1 compiler ' 'with V2_COMPATIBLE or V2_ENGINE mode\).'): compiler.Compiler( mode=v1dsl.PipelineExecutionMode.V2_COMPATIBLE).compile( pipeline_func=my_pipeline, package_path='result.json')
} ) def echo_op(text): return dsl.ContainerOp( name='echo', image='library/bash:4.4.23', command=['sh', '-cex'], arguments=['echo "$0"', text], ) @dsl.pipeline( name='Exit Handler', description= 'Downloads a message and prints it. The exit handler will run after the pipeline finishes (successfully or not).' ) def download_and_print(url='gs://ml-pipeline/shakespeare1.txt'): """A sample pipeline showing exit handler.""" exit_task = echo_op('exit!') with dsl.ExitHandler(exit_task): download_task = gcs_download_op(url) echo_task = echo_op(download_task.output) if __name__ == '__main__': compiler.Compiler().compile(download_and_print, __file__ + '.yaml')
name="my-in-coop2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo op2 %s" % item.b], ) op_out = dsl.ContainerOp( name="my-out-cop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo %s" % my_pipe_param], ) if __name__ == '__main__': from kfp.deprecated import compiler print(compiler.Compiler().compile(pipeline, package_path=None)) import kfp.deprecated as kfp client = kfp.Client(host='127.0.0.1:8080/pipeline') pkg_path = '/tmp/witest_pkg.tar.gz' compiler.Compiler().compile(pipeline, package_path=pkg_path) exp = client.create_experiment('withparams_exp') client.run_pipeline( experiment_id=exp.id, job_name='withitem_basic', pipeline_package_path=pkg_path, params={}, )
name='GCS - Download', image='google/cloud-sdk:279.0.0', command=['sh', '-c'], arguments=['gsutil cat $0 | tee $1', url, '/tmp/results.txt'], file_outputs={ 'data': '/tmp/results.txt', } ) def echo_op(text): return dsl.ContainerOp( name='echo', image='library/bash:4.4.23', command=['sh', '-c'], arguments=['echo "$0"', text] ) @dsl.pipeline( name='sequential-pipeline', description='A pipeline with two sequential steps.' ) def sequential_pipeline(url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'): """A pipeline with two sequential steps.""" download_task = gcs_download_op(url) echo_task = echo_op(download_task.output) if __name__ == '__main__': compiler.Compiler().compile(sequential_pipeline, __file__ + '.yaml')
# See the License for the specific language governing permissions and # limitations under the License. from kfp.deprecated import dsl, compiler import kfp.deprecated.components as comp @comp.create_component_from_func def random_failure_op(exit_codes): """A component that fails randomly.""" import random import sys exit_code = int(random.choice(exit_codes.split(","))) print(exit_code) sys.exit(exit_code) @dsl.pipeline( name='retry-random-failures', description='The pipeline includes two steps which fail randomly. It shows how to use ContainerOp(...).set_retry(...).' ) def retry_sample_pipeline(): op1 = random_failure_op('0,1,2,3').set_retry(10) op2 = random_failure_op('0,1').set_retry(5) if __name__ == '__main__': compiler.Compiler().compile(retry_sample_pipeline, __file__ + '.yaml')
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from kfp.deprecated import dsl, components, compiler @components.create_component_from_func def print_op(s: str): print(s) @dsl.pipeline(name='my-pipeline') def pipeline(): loop_args = [{'A_a': 1, 'B_b': 2}, {'A_a': 10, 'B_b': 20}] with dsl.ParallelFor(loop_args, parallelism=10) as item: print_op(item) print_op(item.A_a) print_op(item.B_b) if __name__ == '__main__': compiler.Compiler().compile(pipeline, __file__ + '.yaml')
pvolumes={"/data": step1.pvolume}) step2_snap = dsl.VolumeSnapshotOp(name="step2_snap", resource_name="step2_snap", volume=step2.pvolume) step3 = dsl.ContainerOp(name="step3_copy", image="library/bash:4.4.23", command=["sh", "-c"], arguments=[ "mkdir /data/step3 && " "cp -av /data/step2/file1 /data/step3/file3" ], pvolumes={"/data": step2.pvolume}) step3_snap = dsl.VolumeSnapshotOp(name="step3_snap", resource_name="step3_snap", volume=step3.pvolume) step4 = dsl.ContainerOp( name="step4_output", image="library/bash:4.4.23", command=["cat", "/data/step2/file1", "/data/step3/file3"], pvolumes={"/data": step3.pvolume}) if __name__ == "__main__": import kfp.deprecated.compiler as compiler compiler.Compiler().compile(volume_snapshotop_sequential, __file__ + ".tar.gz")
volume=step1.pvolume) vop2 = dsl.VolumeOp(name="create_volume_2", resource_name="vol2", data_source=step1_snap.snapshot, size=step1_snap.outputs["size"]) step2 = dsl.ContainerOp(name="step2_gunzip", image="library/bash:4.4.23", command=["gunzip", "-k", "/data/full.gz"], pvolumes={"/data": vop2.volume}) step2_snap = dsl.VolumeSnapshotOp(name="create_snapshot_2", resource_name="snap2", volume=step2.pvolume) vop3 = dsl.VolumeOp(name="create_volume_3", resource_name="vol3", data_source=step2_snap.snapshot, size=step2_snap.outputs["size"]) step3 = dsl.ContainerOp(name="step3_output", image="library/bash:4.4.23", command=["cat", "/data/full"], pvolumes={"/data": vop3.volume}) if __name__ == "__main__": import kfp.deprecated.compiler as compiler compiler.Compiler().compile(volume_snapshotop_rokurl, __file__ + ".tar.gz")
@dsl.pipeline(name="ResourceOp Basic", description="A Basic Example on ResourceOp Usage.") def resourceop_basic(username, password): secret_resource = k8s_client.V1Secret( api_version="v1", kind="Secret", metadata=k8s_client.V1ObjectMeta(generate_name="my-secret-"), type="Opaque", data={ "username": username, "password": password }) rop = dsl.ResourceOp(name="create-my-secret", k8s_resource=secret_resource, attribute_outputs={"name": "{.metadata.name}"}) secret = k8s_client.V1Volume( name="my-secret", secret=k8s_client.V1SecretVolumeSource(secret_name=rop.output)) cop = dsl.ContainerOp(name="cop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["ls /etc/secret-volume"], pvolumes={"/etc/secret-volume": secret}) if __name__ == "__main__": import kfp.deprecated.compiler as compiler compiler.Compiler().compile(resourceop_basic, __file__ + ".tar.gz")
@dsl.graph_component def flip_component(flip_result): print_flip = print_op(flip_result) flipA = flip_coin_op().after(print_flip) # set max_cache_staleness to 0 to prevent infinite loop due to caching flipA.execution_options.caching_strategy.max_cache_staleness = "P0D" with dsl.Condition(flipA.output == 'heads'): # When the flip_component is called recursively, the flipA.output # from inside the graph component will be passed to the next flip_component # as the input whereas the flip_result in the current graph component # comes from the flipA.output in the flipcoin function. flip_component(flipA.output) @dsl.pipeline( name='recursive-loop-pipeline', description='Shows how to create recursive loops.' ) def flipcoin(): first_flip = flip_coin_op() # set max_cache_staleness to 0 to prevent infinite loop due to caching first_flip.execution_options.caching_strategy.max_cache_staleness = "P0D" flip_loop = flip_component(first_flip.output) # flip_loop is a graph_component with the outputs field # filled with the returned dictionary. print_op('cool, it is over.').after(flip_loop) if __name__ == '__main__': compiler.Compiler().compile(flipcoin, __file__ + '.yaml')
# limitations under the License. import kfp.deprecated.components as comp from kfp.deprecated import dsl, compiler @comp.create_component_from_func def print_op(msg: str): """Print a message.""" print(msg) def add_annotation(op): op.add_pod_annotation(name='hobby', value='football') return op @dsl.pipeline( name='pipeline-transformer', description= 'The pipeline shows how to apply functions to all ops in the pipeline by pipeline transformers' ) def transform_pipeline(): op1 = print_op('hey, what are you up to?') op2 = print_op('train my model.') dsl.get_pipeline_conf().add_op_transformer(add_annotation) if __name__ == '__main__': compiler.Compiler().compile(transform_pipeline, __file__ + '.yaml')
"""Print a message.""" def __init__(self, msg): super(PrintOp, self).__init__( name='Print', image='alpine:3.6', command=['echo', msg], ) @graph_component def flip_component(flip_result): print_flip = PrintOp(flip_result) flipA = FlipCoinOp().after(print_flip) with dsl.Condition(flipA.output == 'heads'): flip_component(flipA.output) @dsl.pipeline(name='pipeline flip coin', description='shows how to use graph_component.') def recursive(): flipA = FlipCoinOp() flipB = FlipCoinOp() flip_loop = flip_component(flipA.output) flip_loop.after(flipB) PrintOp('cool, it is over. %s' % flipA.output).after(flip_loop) if __name__ == '__main__': import kfp.deprecated.compiler as compiler compiler.Compiler().compile(recursive, __file__ + '.tar.gz')
analysis=analyze_output, workers=workers, rounds=rounds, output=train_output).after(_transform_op).set_display_name( 'Trainer') _predict_op = dataproc_predict_op( project=project, region=region, cluster_name=cluster_name, data=transform_output_eval, model=train_output, target=target, analysis=analyze_output, output=predict_output).after(_train_op).set_display_name( 'Predictor') _cm_op = confusion_matrix_op( predictions=os.path.join(predict_output, 'part-*.csv'), output_dir=output_template).after(_predict_op) _roc_op = roc_op(predictions_dir=os.path.join(predict_output, 'part-*.csv'), true_class=true_label, true_score_column=true_label, output_dir=output_template).after(_predict_op) if __name__ == '__main__': compiler.Compiler().compile(xgb_train_pipeline, __file__ + '.yaml')
# you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import kfp.deprecated.dsl as dsl @dsl.pipeline( name="Param Substitutions", description="Test the same PipelineParam getting substituted in multiple " "places") def param_substitutions(): vop = dsl.VolumeOp(name="create_volume", resource_name="data", size="1Gi") op = dsl.ContainerOp(name="cop", image="image", arguments=["--param", vop.output], pvolumes={"/mnt": vop.volume}) if __name__ == '__main__': import kfp.deprecated.compiler as compiler compiler.Compiler().compile(param_substitutions, __file__ + '.tar.gz')
# Copyright 2019 The Kubeflow Authors # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import kfp.deprecated as kfp @kfp.dsl.pipeline(name='Test adding pod env', description='Test adding pod env') def test_add_pod_env(): op = kfp.dsl.ContainerOp(name='echo', image='library/bash', command=['sh', '-c'], arguments=['echo $KFP_POD_NAME']).add_pod_label( 'add-pod-env', 'true') if __name__ == '__main__': import kfp.deprecated.compiler as compiler compiler.Compiler().compile(test_add_pod_env, __file__ + '.yaml')
step2_snap = dsl.VolumeSnapshotOp( name="step2_snap", resource_name="step2_snap", volume=step2.pvolume ) step3 = dsl.ContainerOp( name="step3_copy", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["mkdir /data/step3 && " "cp -av /data/step2/file1 /data/step3/file3"], pvolumes={"/data": step2.pvolume} ) step3_snap = dsl.VolumeSnapshotOp( name="step3_snap", resource_name="step3_snap", volume=step3.pvolume ) step4 = dsl.ContainerOp( name="step4_output", image="library/bash:4.4.23", command=["cat", "/data/step2/file1", "/data/step3/file3"], pvolumes={"/data": step3.pvolume} ) if __name__ == '__main__': compiler.Compiler().compile(volume_snapshotop_sequential, __file__ + '.yaml')
# See the License for the specific language governing permissions and # limitations under the License. from kfp.deprecated import dsl, compiler import kfp.deprecated.components as comp @comp.create_component_from_func def echo1_op(text1: str): print(text1) @comp.create_component_from_func def echo2_op(text2: str): print(text2) @dsl.pipeline( name='execution-order-pipeline', description='A pipeline to demonstrate execution order management.') def execution_order_pipeline(text1: str = 'message 1', text2: str = 'message 2'): """A two step pipeline with an explicitly defined execution order.""" step1_task = echo1_op(text1) step2_task = echo2_op(text2) step2_task.after(step1_task) if __name__ == '__main__': compiler.Compiler().compile(execution_order_pipeline, __file__ + '.yaml')
} ) def echo2_op(text1, text2): return dsl.ContainerOp( name='echo', image='library/bash:4.4.23', command=['sh', '-c'], arguments=['echo "Text 1: $0"; echo "Text 2: $1"', text1, text2] ) @dsl.pipeline( name='parallel-pipeline', description='Download two messages in parallel and prints the concatenated result.' ) def download_and_join( url1='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt', url2='gs://ml-pipeline/sample-data/shakespeare/shakespeare2.txt' ): """A three-step pipeline with first two running in parallel.""" download1_task = gcs_download_op(url1) download2_task = gcs_download_op(url2) echo_task = echo2_op(download1_task.output, download2_task.output) if __name__ == '__main__': compiler.Compiler().compile(download_and_join, __file__ + '.yaml')
@components.create_component_from_func def print_file(file_path: InputPath('Any')): """Print a file.""" with open(file_path) as f: print(f.read()) @components.create_component_from_func def echo_msg(msg: str): """Echo a message by parameter.""" print(msg) @dsl.pipeline( name='exit-handler', description= 'Downloads a message and prints it. The exit handler will run after the pipeline finishes (successfully or not).' ) def pipeline_exit_handler(url: str = 'gs://ml-pipeline/shakespeare1.txt'): """A sample pipeline showing exit handler.""" exit_task = echo_msg('exit!') with dsl.ExitHandler(exit_task): download_task = gcs_download_op(url) echo_task = print_file(download_task.output) if __name__ == '__main__': compiler.Compiler().compile(pipeline_exit_handler, __file__ + '.yaml')
def parallelfor_item_argument_resolving(): produce_str_task = produce_str() produce_list_of_strings_task = produce_list_of_strings() produce_list_of_ints_task = produce_list_of_ints() produce_list_of_dicts_task = produce_list_of_dicts() with kfp.dsl.ParallelFor(produce_list_of_strings_task.output) as loop_item: consume(produce_list_of_strings_task.output) consume(loop_item) consume(produce_str_task.output) with kfp.dsl.ParallelFor(produce_list_of_ints_task.output) as loop_item: consume(produce_list_of_ints_task.output) consume(loop_item) with kfp.dsl.ParallelFor(produce_list_of_dicts_task.output) as loop_item: consume(produce_list_of_dicts_task.output) #consume(loop_item) # Cannot use the full loop item when it's a dict consume(loop_item.aaa) loop_args = [{'a': 1, 'b': 2}, {'a': 10, 'b': 20}] with kfp.dsl.ParallelFor(loop_args) as loop_item: consume(loop_args) consume(loop_item) if __name__ == '__main__': import kfp.deprecated.compiler as compiler compiler.Compiler().compile(parallelfor_item_argument_resolving, __file__ + '.yaml')
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import kfp.deprecated.dsl as dsl @dsl.pipeline( name="VolumeOp Basic", description="A Basic Example on VolumeOp Usage.") def volumeop_basic(size): vop = dsl.VolumeOp( name="create_pvc", resource_name="my-pvc", modes=dsl.VOLUME_MODE_RWM, size=size) cop = dsl.ContainerOp( name="cop", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo foo > /mnt/file1"], pvolumes={"/mnt": vop.volume}) if __name__ == "__main__": import kfp.deprecated.compiler as compiler compiler.Compiler().compile(volumeop_basic, __file__ + ".tar.gz")
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from kfp.deprecated import components, dsl, compiler @components.create_component_from_func def write_to_volume(): with open("/mnt/file.txt", "w") as file: file.write("Hello world") @dsl.pipeline( name="volumeop-basic", description="A Basic Example on VolumeOp Usage." ) def volumeop_basic(size: str="1Gi"): vop = dsl.VolumeOp( name="create-pvc", resource_name="my-pvc", modes=dsl.VOLUME_MODE_RWO, size=size ) write_to_volume().add_pvolumes({"/mnt": vop.volume}) if __name__ == '__main__': compiler.Compiler().compile(volumeop_basic, __file__ + '.yaml')
"metadata": { "name": "resource-basic" }, "spec": { "containers": [{ "name": "sample-container", "image": "k8s.gcr.io/busybox", "command": ["/usr/bin/env"] }], "restartPolicy": "Never" } }, "backoffLimit": 4 } } """ @dsl.pipeline(name="resourceop-basic", description="A Basic Example on ResourceOp Usage.") def resourceop_basic(): # Start a container. Print out env vars. op = dsl.ResourceOp(name='test-step', k8s_resource=json.loads(_CONTAINER_MANIFEST), action='create') if __name__ == '__main__': compiler.Compiler().compile(resourceop_basic, __file__ + '.yaml')
a = [i for i in range(n)] return len(a) @components.create_component_from_func def generate_resource_request( ) -> NamedTuple('output', [('memory', str), ('cpu', str)]): '''Returns the memory and cpu request''' from collections import namedtuple resource_output = namedtuple('output', ['memory', 'cpu']) return resource_output('500Mi', '200m') @dsl.pipeline( name='Runtime resource request pipeline', description='An example on how to make resource requests at runtime.') def resource_request_pipeline(n: int = 11234567): resource_task = generate_resource_request() traning_task = training_op(n)\ .set_memory_limit(resource_task.outputs['memory'])\ .set_cpu_limit(resource_task.outputs['cpu'])\ .set_cpu_request('200m') # Disable cache for KFP v1 mode. traning_task.execution_options.caching_strategy.max_cache_staleness = 'P0D' if __name__ == '__main__': compiler.Compiler().compile(resource_request_pipeline, __file__ + '.yaml')
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from kfp.deprecated import dsl, compiler, components @components.create_component_from_func def print_op(msg): """Print a message.""" print(msg) @dsl.pipeline( name='pipeline-service-account', description='The pipeline shows how to set the max number of parallel pods in a pipeline.' ) def pipeline_parallelism(): op1 = print_op('hey, what are you up to?') op2 = print_op('train my model.') dsl.get_pipeline_conf().set_parallelism(1) if __name__ == '__main__': compiler.Compiler().compile(pipeline_parallelism, __file__ + '.yaml')
@dsl.pipeline(name="Volume Op DAG", description="The second example of the design doc.") def volume_op_dag(): vop = dsl.VolumeOp(name="create_pvc", resource_name="my-pvc", size="10Gi", modes=dsl.VOLUME_MODE_RWM) step1 = dsl.ContainerOp(name="step1", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo 1 | tee /mnt/file1"], pvolumes={"/mnt": vop.volume}) step2 = dsl.ContainerOp(name="step2", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["echo 2 | tee /mnt2/file2"], pvolumes={"/mnt2": vop.volume}) step3 = dsl.ContainerOp(name="step3", image="library/bash:4.4.23", command=["sh", "-c"], arguments=["cat /mnt/file1 /mnt/file2"], pvolumes={"/mnt": vop.volume.after(step1, step2)}) if __name__ == "__main__": import kfp.deprecated.compiler as compiler compiler.Compiler().compile(volume_op_dag, __file__ + ".tar.gz")
@dsl.pipeline( name="pipeline-with-sidecar", description= "A pipeline that demonstrates how to add a sidecar to an operation." ) def pipeline_with_sidecar(): # sidecar with sevice that reply "hello world" to any GET request echo = dsl.Sidecar( name="echo", image="nginx:1.13", command=["nginx", "-g", "daemon off;"], ) # container op with sidecar op1 = dsl.ContainerOp( name="download", image="busybox:latest", command=["sh", "-c"], arguments=[ "until wget http://localhost:80 -O /tmp/results.txt; do sleep 5; done && cat /tmp/results.txt" ], sidecars=[echo], file_outputs={"downloaded": "/tmp/results.txt"}, ) if __name__ == '__main__': compiler.Compiler().compile(pipeline_with_sidecar, __file__ + '.yaml')
def __init__(self, msg): super(PrintOp, self).__init__( name='Print', image='alpine:3.6', command=['echo', msg], ) @dsl._component.graph_component def flip_component(flip_result, maxVal): with dsl.Condition(flip_result == 'heads'): print_flip = PrintOp(flip_result) flipA = FlipCoinOp().after(print_flip) flip_component(flipA.output, maxVal) @dsl.pipeline( name='pipeline flip coin', description='shows how to use dsl.Condition.') def flipcoin(maxVal=12): flipA = FlipCoinOp() flipB = FlipCoinOp() flip_loop = flip_component(flipA.output, maxVal) flip_loop.after(flipB) PrintOp('cool, it is over. %s' % flipA.output).after(flip_loop) if __name__ == '__main__': import kfp.deprecated.compiler as compiler compiler.Compiler().compile(flipcoin, __file__ + '.tar.gz')