def test_annotations_stripping(self): import collections import typing MyFuncOutputs = typing.NamedTuple('Outputs', [('sum', int), ('product', int)]) class CustomType1: pass def my_func( param1: CustomType1 = None, # This caused failure previously param2: collections. OrderedDict = None, # This caused failure previously ) -> MyFuncOutputs: # This caused failure previously assert param1 == None assert param2 == None return (8, 15) task_factory = comp.create_component_from_func(my_func) self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={ 'sum': '8', 'product': '15' })
def test_handling_list_arguments_containing_serializable_python_objects( self): """Checks that lists containing python objects with .to_struct() can be properly serialized.""" class MyClass: def to_struct(self): return {'foo': [7, 42]} def assert_values_are_correct( list_param: list, dict_param: dict, ) -> int: import unittest unittest.TestCase().assertEqual(list_param, [1, { 'foo': [7, 42] }, 3]) unittest.TestCase().assertEqual(dict_param, {'k1': { 'foo': [7, 42] }}) return 1 task_factory = comp.create_component_from_func( assert_values_are_correct) self.helper_test_component_using_local_call( task_factory, arguments=dict( list_param=[1, MyClass(), 3], dict_param={'k1': MyClass()}, ), expected_output_values={'Output': '1'}, )
def test_code_with_escapes(self): def my_func(): """Hello \n world.""" task_factory = comp.create_component_from_func(my_func) self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={})
def test_argument_serialization_success(self): from typing import List def my_func(args: List[int]): print(args) task_factory = comp.create_component_from_func(my_func) self.helper_test_component_using_local_call( task_factory, arguments={'args': [1, 2, 3]})
def test_fail_on_handling_list_arguments_containing_python_objects(self): """Checks that lists containing python objects not having .to_struct() raise error during serialization.""" class MyClass: pass def consume_list(list_param: list, ) -> int: return 1 def consume_dict(dict_param: dict, ) -> int: return 1 list_op = comp.create_component_from_func(consume_list) dict_op = comp.create_component_from_func(consume_dict) with self.assertRaises(Exception): list_op([1, MyClass(), 3]) with self.assertRaises(Exception): dict_op({'k1': MyClass()})
def artifact_passing_pipeline(): producer_task = producer_op() processor_task = processor_op(producer_task.outputs['output_1'], producer_task.outputs['output_2']) consumer_task = consumer_op(processor_task.outputs['output_1'], processor_task.outputs['output_2']) markdown_task = create_component_from_func(func=metadata_and_metrics)() # This line is only needed for compiling using dsl-compile to work kfp.dsl.get_pipeline_conf( ).data_passing_method = volume_based_data_passing_method
def test_component_annotations(self): def some_func(): pass annotations = { 'key1': 'value1', 'key2': 'value2', } task_factory = comp.create_component_from_func(some_func, annotations=annotations) component_spec = task_factory.component_spec self.assertEqual(component_spec.metadata.annotations, annotations)
def test_argument_serialization_failure(self): from typing import Sequence def my_func(args: Sequence[int]): print(args) task_factory = comp.create_component_from_func(my_func) with self.assertRaisesRegex( TypeError, 'There are no registered serializers for type "(typing.)?Sequence(\[int\])?"' ): self.helper_test_component_using_local_call( task_factory, arguments={'args': [1, 2, 3]})
def test_handling_list_arguments_containing_pipelineparam(self): """Checks that lists containing PipelineParam can be properly serialized.""" def consume_list(list_param: list) -> int: pass import kfp.deprecated as kfp task_factory = create_component_from_func(consume_list) task = task_factory([1, 2, 3, kfp.dsl.PipelineParam('aaa'), 4, 5, 6]) full_command_line = task.command + task.arguments for arg in full_command_line: self.assertNotIn('PipelineParam', arg)
def get_run_info(run_id: str): """Example of getting run info for current pipeline run.""" print(f'Current run ID is {run_id}.') # KFP API server is usually available as ml-pipeline service in the same # namespace, but for full Kubeflow deployment, you need to edit this to # http://ml-pipeline.kubeflow:8888, because your pipelines are running in # user namespaces, but the API is at kubeflow namespace. import kfp client = kfp.Client(host='http://ml-pipeline:8888') run_info = client.get_run(run_id=run_id) # Hide verbose info print(run_info.run) get_run_info_component = components.create_component_from_func( func=get_run_info, packages_to_install=['kfp'], ) @dsl.pipeline( name='use-run-id', description= 'A pipeline that demonstrates how to use run information, including run ID etc.' ) def pipeline_use_run_id(run_id: str = kfp.dsl.RUN_ID_PLACEHOLDER): """kfp.dsl.RUN_ID_PLACEHOLDER inside a pipeline parameter will be populated with KFP Run ID at runtime.""" run_info_op = get_run_info_component(run_id=run_id) if __name__ == '__main__':
"""Two step v2-compatible pipeline.""" from kfp.deprecated import components, dsl from kfp.deprecated.components import InputPath, OutputPath def preprocess(uri: str, some_int: int, output_parameter_one: OutputPath(int), output_dataset_one: OutputPath('Dataset')): """Dummy Preprocess Step.""" with open(output_dataset_one, 'w') as f: f.write('Output dataset') with open(output_parameter_one, 'w') as f: f.write("{}".format(1234)) preprocess_op = components.create_component_from_func(preprocess, base_image='python:3.9') @components.create_component_from_func def train_op(dataset: InputPath('Dataset'), model: OutputPath('Model'), num_steps: int = 100): """Dummy Training Step.""" with open(dataset, 'r') as input_file: input_string = input_file.read() with open(model, 'w') as output_file: for i in range(num_steps): output_file.write("Step {}\n{}\n=====\n".format( i, input_string))
# limitations under the License. from kfp.deprecated import dsl, components, compiler from typing import NamedTuple def training_job(): import torch use_cuda = torch.cuda.is_available() print(f'The gpus status is: {use_cuda}') if not use_cuda: raise ValueError('GPU not available') training_comp = components.create_component_from_func( training_job, base_image='pytorch/pytorch:1.7.1-cuda11.0-cudnn8-runtime', packages_to_install=[]) @components.create_component_from_func def generate_resource_constraints_request( ) -> NamedTuple('output', [('gpu_vendor', str), ('nbr_gpus', str), ('constrain_type', str), ('constrain_value', str)]): """Returns the gpu resource and constraints settings""" from collections import namedtuple output = namedtuple( 'output', ['gpu_vendor', 'nbr_gpu', 'constrain_type', 'constrain_value']) return output('nvidia.com/gpu', '1', 'cloud.google.com/gke-accelerator', 'nvidia-tesla-p4')
def flip_coin(force_flip_result: str = '') -> str: """Flip a coin and output heads or tails randomly.""" if force_flip_result: return force_flip_result import random result = 'heads' if random.randint(0, 1) == 0 else 'tails' return result def print_msg(msg: str): """Print a message.""" print(msg) flip_coin_op = components.create_component_from_func(flip_coin) print_op = components.create_component_from_func(print_msg) @dsl.pipeline(name='condition') def condition(text: str = 'condition test', force_flip_result: str = ''): flip1 = flip_coin_op(force_flip_result) print_op(flip1.output) with dsl.Condition(flip1.output == 'heads'): flip2 = flip_coin_op() print_op(flip2.output) print_op(text)
# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Fail pipeline.""" from kfp.deprecated import components, dsl def fail(): '''Fails''' import sys sys.exit(1) fail_op = components.create_component_from_func(fail, base_image='alpine:latest') @dsl.pipeline(name='fail-pipeline') def fail_pipeline(): fail_task = fail_op()
]) model = create_model() model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy']) tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1) model.fit(x=x_train, y=y_train, epochs=5, validation_data=(x_test, y_test), callbacks=[tensorboard_callback]) # Be careful when choosing a tensorboard image: # * tensorflow/tensorflow may fail with image pull backoff, because of dockerhub rate limiting. # * tensorboard in tensorflow 2.3+ does not work with KFP, refer to https://github.com/kubeflow/pipelines/issues/5521. train_op = create_component_from_func( train, base_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-4') @dsl.pipeline(name='pipeline-tensorboard-gcs') def my_pipeline( log_dir=f'gs://{{kfp-default-bucket}}/tensorboard/logs/{dsl.RUN_ID_PLACEHOLDER}' ): prepare_tb_task = prepare_tensorboard(log_dir_uri=log_dir) tensorboard_task = train_op(log_dir=log_dir).after(prepare_tb_task)
log_dir, os.path.relpath(start=log_dir_local, path=path) ) client.fput_object( bucket_name=log_bucket, object_name=object_name, file_path=path, ) count = count + 1 print(f'{path} uploaded to minio://{log_bucket}/{object_name}') print(f'{count} log files uploaded to minio://{log_bucket}/{log_dir}') # tensorflow/tensorflow:2.4 may fail with image pull backoff, because of dockerhub rate limiting. train_op = create_component_from_func( train, base_image='gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest', packages_to_install=['minio'], # TODO: pin minio version ) @dsl.pipeline(name='pipeline-tensorboard-minio') def my_pipeline( minio_endpoint: str = 'minio-service:9000', log_bucket: str = 'mlpipeline', log_dir: str = f'tensorboard/logs/{dsl.RUN_ID_PLACEHOLDER}', # Pin to tensorflow 2.3, because in 2.4+ tensorboard cannot load in KFP: # refer to https://github.com/kubeflow/pipelines/issues/5521. tf_image: str = 'gcr.io/deeplearning-platform-release/tf2-cpu.2-3:latest' ): # tensorboard uses s3 protocol to access minio prepare_tb_task = prepare_tensorboard(
''') # Accessing GCS using the Google Cloud Python library def gcs_list_buckets(): from google.cloud import storage storage_client = storage.Client() buckets = storage_client.list_buckets() print("List of buckets:") for bucket in buckets: print(bucket.name) gcs_list_buckets_op = components.create_component_from_func( gcs_list_buckets, base_image='python:3.7', packages_to_install=['google-cloud-storage==1.31.2'], ) @dsl.pipeline( name='secret-pipeline', description='A pipeline to demonstrate mounting and use of secretes.' ) def secret_op_pipeline( url='gs://ml-pipeline/sample-data/shakespeare/shakespeare1.txt'): """A pipeline that uses secret to access cloud hosted resouces.""" gcs_list_items_task = gcs_list_items_op(url) gcs_list_buckets_task = gcs_list_buckets_op()