def test_func_to_container_op_call_other_func(self): extra_variable = 10 class ExtraClass: def class_method(self, x): return x * extra_variable def extra_func(a: float) -> float: return a * 5 def main_func(a: float, b: float) -> float: return ExtraClass().class_method(a) + extra_func(b) func = main_func op = comp.func_to_container_op(func, use_code_pickling=True) self.helper_test_2_in_1_out_component_using_local_call(func, op)
def test_input_text_file(self): from kfp.components import InputTextFile def consume_file_path(number_file: InputTextFile(int)) -> int: string_data = number_file.read() assert isinstance(string_data, str) return int(string_data) task_factory = comp.func_to_container_op(consume_file_path) self.assertEqual(task_factory.component_spec.inputs[0].type, 'Integer') # TODO: Fix the input names: "number_file" parameter should be exposed as "number" input self.helper_test_component_using_local_call( task_factory, arguments={'number_file': "42"}, expected_output_values={'output': '42'})
def test_handling_list_dict_output_values(self): def produce_list() -> list: return (["string", 1, 2.2, True, False, None, [3, 4], {'s': 5}], ) # ! JSON map keys are always strings. Python converts all keys to strings without warnings task_factory = comp.func_to_container_op(produce_list) import json expected_output = json.dumps( ["string", 1, 2.2, True, False, None, [3, 4], { 's': 5 }]) self.helper_test_component_using_local_call( task_factory, arguments={}, expected_output_values={'output': expected_output})
def test_output_binary_file(self): from kfp.components import OutputBinaryFile def write_to_file_path(number_file: OutputBinaryFile(int)): number_file.write(b'42') task_factory = comp.func_to_container_op(write_to_file_path) self.assertFalse(task_factory.component_spec.inputs) self.assertEqual(len(task_factory.component_spec.outputs), 1) self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer') self.helper_test_component_using_local_call( task_factory, arguments={}, expected_output_values={'number': '42'})
def test_handling_list_dict_arguments(self): def assert_values_are_same( list_param: list, dict_param: dict, ) -> int: import unittest unittest.TestCase().assertEqual(list_param, ["string", 1, 2.2, True, False, None, [3, 4], {'s': 5}]) unittest.TestCase().assertEqual(dict_param, {'str': "string", 'int': 1, 'float': 2.2, 'false': False, 'true': True, 'none': None, 'list': [3, 4], 'dict': {'s': 4}}) return 1 # ! JSON map keys are always strings. Python converts all keys to strings without warnings func = assert_values_are_same op = comp.func_to_container_op(func) self.helper_test_2_in_1_out_component_using_local_call(func, op, arguments=[ ["string", 1, 2.2, True, False, None, [3, 4], {'s': 5}], {'str': "string", 'int': 1, 'float': 2.2, 'false': False, 'true': True, 'none': None, 'list': [3, 4], 'dict': {'s': 4}}, ])
def test_output_path(self): from kfp.components import OutputPath def write_to_file_path(number_file_path: OutputPath(int)): with open(number_file_path, 'w') as f: f.write(str(42)) task_factory = comp.func_to_container_op(write_to_file_path) self.assertFalse(task_factory.component_spec.inputs) self.assertEqual(len(task_factory.component_spec.outputs), 1) self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer') self.helper_test_component_using_local_call( task_factory, arguments={}, expected_output_values={'number': '42'})
def test_handling_complex_default_values_of_none(self): def assert_values_are_default( a, b, singleton_param=None, function_param=ascii, dict_param={'b': [2, 3, 4]}, func_call_param='_'.join(['a', 'b', 'c']), ) -> int: assert singleton_param is None assert function_param is ascii assert dict_param == {'b': [2, 3, 4]} assert func_call_param == '_'.join(['a', 'b', 'c']) return 1 func = assert_values_are_default op = comp.func_to_container_op(func) self.helper_test_2_in_1_out_component_using_local_call(func, op)
def test_output_text_file(self): from kfp.components import OutputTextFile def write_to_file_path(number_file: OutputTextFile(int)): number_file.write(str(42)) task_factory = comp.func_to_container_op(write_to_file_path) self.assertFalse(task_factory.component_spec.inputs) self.assertEqual(len(task_factory.component_spec.outputs), 1) self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer') # TODO: Fix the output names: "number_file" should be exposed as "number" output self.helper_test_component_using_local_call( task_factory, arguments={}, expected_output_values={'number_file': '42'})
def test_end_to_end_python_component_pipeline_compilation(self): import kfp.components as comp #Defining the Python function def add(a: float, b: float) -> float: '''Returns sum of two arguments''' return a + b with tempfile.TemporaryDirectory() as temp_dir_name: add_component_file = str( Path(temp_dir_name).joinpath('add.component.yaml')) #Converting the function to a component. Instantiate it to create a pipeline task (ContaineOp instance) add_op = comp.func_to_container_op( add, base_image='python:3.5', output_component_file=add_component_file) #Checking that the component artifact is usable: add_op2 = comp.load_component_from_file(add_component_file) #Building the pipeline import kfp.dsl as dsl @dsl.pipeline( name='Calculation pipeline', description='A pipeline that performs arithmetic calculations.' ) def calc_pipeline( a1, a2='7', a3='17', ): task_1 = add_op(a1, a2) task_2 = add_op2(a1, a2) task_3 = add_op(task_1.output, task_2.output) task_4 = add_op2(task_3.output, a3) #Compiling the pipleine: pipeline_filename = str( Path(temp_dir_name).joinpath(calc_pipeline.__name__ + '.pipeline.tar.gz')) import kfp.compiler as compiler compiler.Compiler().compile(calc_pipeline, pipeline_filename)
def test_func_to_container_op_check_nothing_extra_captured(self): def f1(): pass def f2(): pass def main_func(a: float, b: float) -> float: f1() try: eval('f2()') except: return a + b raise AssertionError("f2 should not be captured, because it's not a dependency.") expected_func = lambda a, b: a + b op = comp.func_to_container_op(main_func) self.helper_test_2_in_1_out_component_using_local_call(expected_func, op)
def test_handling_base64_pickle_arguments(self): def assert_values_are_same( obj1: 'Base64Pickle', # noqa: F821 obj2: 'Base64Pickle', # noqa: F821 ) -> int: import unittest unittest.TestCase().assertEqual(obj1['self'], obj1) unittest.TestCase().assertEqual(obj2, open) return 1 func = assert_values_are_same op = comp.func_to_container_op(func) recursive_obj = {} recursive_obj['self'] = recursive_obj self.helper_test_2_in_1_out_component_using_local_call(func, op, arguments=[ recursive_obj, open, ])
def test_all_data_passing_ways(self): from kfp.components import InputTextFile, InputPath, OutputTextFile, OutputPath def write_to_file_path( file_input1_path: InputPath(str), file_input2_file: InputTextFile(str), file_output1_path: OutputPath(str), file_output2_file: OutputTextFile(str), value_input1: str = 'foo', value_input2: str = 'foo', ) -> NamedTuple( 'Outputs', [ ('return_output1', str), ('return_output2', str), ] ): with open(file_input1_path, 'r') as file_input1_file: with open(file_output1_path, 'w') as file_output1_file: file_output1_file.write(file_input1_file.read()) file_output2_file.write(file_input2_file.read()) return (value_input1, value_input2) task_factory = comp.func_to_container_op(write_to_file_path) self.assertEqual(set(input.name for input in task_factory.component_spec.inputs), {'file_input1', 'file_input2', 'value_input1', 'value_input2'}) self.assertEqual(set(output.name for output in task_factory.component_spec.outputs), {'file_output1', 'file_output2', 'return_output1', 'return_output2'}) self.helper_test_component_using_local_call( task_factory, arguments={ 'file_input1': 'file_input1_value', 'file_input2': 'file_input2_value', 'value_input1': 'value_input1_value', 'value_input2': 'value_input2_value', }, expected_output_values={ 'file_output1': 'file_input1_value', 'file_output2': 'file_input2_value', 'return_output1': 'value_input1_value', 'return_output2': 'value_input2_value', }, )
def test_output_path_plus_return_value(self): def write_to_file_path(number_file_path: OutputPath(int)) -> str: with open(number_file_path, 'w') as f: f.write(str(42)) return 'Hello' task_factory = comp.func_to_container_op(write_to_file_path) self.assertFalse(task_factory.component_spec.inputs) self.assertEqual(len(task_factory.component_spec.outputs), 2) self.assertEqual(task_factory.component_spec.outputs[0].type, 'Integer') self.assertEqual(task_factory.component_spec.outputs[1].type, 'String') self.helper_test_component_using_local_call(task_factory, arguments={}, expected_output_values={ 'number': '42', 'Output': 'Hello' })
def test_optional_input_path(self): def consume_file_path(number_file_path: InputPath(int) = None) -> int: result = -1 if number_file_path: with open(number_file_path) as f: string_data = f.read() result = int(string_data) return result task_factory = comp.func_to_container_op(consume_file_path) self.helper_test_component_using_local_call( task_factory, arguments={}, expected_output_values={'Output': '-1'}) self.helper_test_component_using_local_call( task_factory, arguments={'number': "42"}, expected_output_values={'Output': '42'})
def test_python_component_decorator(self): from kfp.dsl import python_component import kfp.components._python_op as _python_op expected_name = 'Sum component name' expected_description = 'Sum component description' expected_image = 'org/image' @python_component(name=expected_name, description=expected_description, base_image=expected_image) def add_two_numbers_decorated( a: float, b: float, ) -> float: '''Returns sum of two arguments''' return a + b component_spec = _python_op._func_to_component_spec( add_two_numbers_decorated) self.assertEqual(component_spec.name, expected_name) self.assertEqual(component_spec.description.strip(), expected_description.strip()) self.assertEqual(component_spec.implementation.container.image, expected_image) func = add_two_numbers_decorated op = comp.func_to_container_op(func) self.helper_test_component_against_func_using_local_call(func, op, arguments={ 'a': 3, 'b': 5.0 })
def main(args): OUT_COMPONENTS_DIR = args.output_component_dir OUT_PIPELINE_DIR = args.output_pipeline_dir # Because we have a non standard library, create a container tokenizer_component = cpt.func_to_container_op( tokenizer, packages_to_install=['nltk==3.5'], output_component_file=f'{OUT_COMPONENTS_DIR}/tokenizer.component') count_tokens_component = cpt.create_component_from_func( count_tokens, output_component_file=f'{OUT_COMPONENTS_DIR}/count_tokens.component') @dsl.pipeline(name='Count Kubeflow Pipeline', description='Count Number of tokens in a sentence') def count_kubeflow_pipeline(sentence='Ciao Kubeflow, come stai oggi?'): tokenizer_task = tokenizer_component(sentence=sentence) count_tokens_task = count_tokens_component( tokens=tokenizer_task.output) complier = cmp.Compiler() complier.compile( pipeline_func=count_kubeflow_pipeline, package_path=f'{OUT_PIPELINE_DIR}/count_kubeflow_pipeline.zip')
def test_func_to_container_op_with_imported_func(self): from .test_data.module1 import module_func_with_deps as module1_func_with_deps func = module1_func_with_deps op = comp.func_to_container_op(func) self.helper_test_2_in_1_out_component_using_local_call(func, op)
from kale.common.kfputils import \ update_uimetadata as _kale_update_uimetadata _kale_blocks = ( _kale_data_loading_block, _kale_block1, _kale_block2, ) _kale_html_artifact = _kale_run_code(_kale_blocks) with open("/results.html", "w") as f: f.write(_kale_html_artifact) _kale_update_uimetadata('results') _kale_mlmdutils.call("mark_execution_complete") _kale_loaddata_op = _kfp_components.func_to_container_op(loaddata) _kale_datapreprocessing_op = _kfp_components.func_to_container_op( datapreprocessing) _kale_featureengineering_op = _kfp_components.func_to_container_op( featureengineering) _kale_decisiontree_op = _kfp_components.func_to_container_op(decisiontree) _kale_svm_op = _kfp_components.func_to_container_op(svm) _kale_naivebayes_op = _kfp_components.func_to_container_op(naivebayes) _kale_logisticregression_op = _kfp_components.func_to_container_op( logisticregression)
def test_func_to_container_op_call_other_func_global(self): func = module_func_with_deps op = comp.func_to_container_op(func, output_component_file='comp.yaml') self.helper_test_2_in_1_out_component_using_local_call(func, op)
source_table=source_table_name, num_lots=num_lots, lots=str(lots)[1:-1]) return query # Create component factories component_store = kfp.components.ComponentStore( local_search_paths=None, url_search_prefixes=[COMPONENT_URL_SEARCH_PREFIX] ) bigquery_query_op = component_store.load_component('bigquery/query') mlengine_train_op = component_store.load_component('ml_engine/train') mlengine_deploy_op = component_store.load_component('ml_engine/deploy') retrieve_best_run_op = func_to_container_op(retrieve_best_run, base_image=BASE_IMAGE) evaluate_model_op = func_to_container_op(evaluate_model, base_image=BASE_IMAGE) @kfp.dsl.pipeline( name='Covertype Classifier Training', description='The pipeline training and deploying the Covertype classifierpipeline_yaml' ) def covertype_train( project_id:GCPProjectID, region:GCPRegion, source_table_name:String, gcs_root:GCSPath, dataset_id:str, evaluation_metric_name:str, evaluation_metric_threshold:float,
def test_func_to_container_op_call_other_func_global(self): func = module_func_with_deps op = comp.func_to_container_op(func, use_code_pickling=True) self.helper_test_2_in_1_out_component_using_local_call(func, op)
return loss test_loss = evaluate_model(model, features_test, target_test) print(f'Test Loss : {test_loss}') write_to_store(bucket_name, { 'test_loss': test_loss.item(), 'conf': conf }, f'score_{hyperparam_idx}', client) return hyperparam_idx download_data_op = func_to_container_op(download_data, base_image=BASE_IMAGE, packages_to_install=["boto3"], modules_to_capture=["utils"], use_code_pickling=True) gen_random_op = func_to_container_op(generate_random_search_point, base_image=BASE_IMAGE, packages_to_install=["boto3"], modules_to_capture=["utils"], use_code_pickling=True) print_gen_val_op = func_to_container_op(print_gen_val, base_image=BASE_IMAGE, packages_to_install=["boto3"], modules_to_capture=["utils"], use_code_pickling=True) train_model_op = func_to_container_op(train_model, base_image=BASE_IMAGE, packages_to_install=["boto3"],
def _get_gpu(is_used: str, gpu_path: OutputPath(str)): import os import json print("is_used: {}".format(is_used)) with open(gpu_path, 'w') as f: if is_used == 'yes': f.write('yes') else: f.write('no') # ========================= LIGHTWEIGHT PYTHON COMPONENTS ========================= _get_latest_model_op = func_to_container_op(_get_latest_model) _get_yaml_op = func_to_container_op(_get_subyamls) _get_gpu_op = func_to_container_op(_get_gpu) # ========================= OPERATORS ========================= def dhealth_train_sl_segmentation_op(python_train_path, input_dataset_yaml, output_path, num_epochs: Integer(), num_batch_size: Integer(), gpu_boolean): print("GPU: {}".format(gpu_boolean)) if gpu_boolean == 'yes': return dsl.ContainerOp( name='DeepHealth - Train Skin Lesion Segmentation',
def test_func_to_container_op_local_call(self): func = add_two_numbers op = comp.func_to_container_op(func) self.helper_test_2_in_1_out_component_using_local_call(func, op)
import kfp.components as comp import git repo = git.Repo(search_parent_directories=True) sha = repo.head.object.hexsha # Modeled after https://github.com/kubeflow/pipelines/blob/master/samples/core/lightweight_component/lightweight_component.ipynb # General note debugging a pipeline is a pain because man fields don't # exist until run-time import comp_1 as comp_1 import comp_2 as comp_2 #might as well keep components a common variable in case you want to write multiple pipelines comp_1_op = comp.func_to_container_op( comp_1.run, base_image=f"docker.io/mohsseha/comp_1:{sha}") comp_2_op = comp.func_to_container_op( comp_2.run, base_image=f"docker.io/mohsseha/comp_2:{sha}") import kfp.dsl as dsl @dsl.pipeline( name='Simple Calculation pipeline', description= 'simple example that composes a couple of ops with different source packages' ) def experiment_pipeline( in_1=3.1, in_2=323.1, username='******',
# run the code blocks inside a jupyter kernel from kale.utils.jupyter_utils import run_code as _kale_run_code from kale.utils.kfp_utils import \ update_uimetadata as _kale_update_uimetadata blocks = ( data_loading_block, block1, block2, ) html_artifact = _kale_run_code(blocks) with open("/results.html", "w") as f: f.write(html_artifact) _kale_update_uimetadata('results') loaddata_op = comp.func_to_container_op(loaddata) datapreprocessing_op = comp.func_to_container_op(datapreprocessing) featureengineering_op = comp.func_to_container_op(featureengineering) decisiontree_op = comp.func_to_container_op(decisiontree) svm_op = comp.func_to_container_op(svm) naivebayes_op = comp.func_to_container_op(naivebayes) logisticregression_op = comp.func_to_container_op(logisticregression) randomforest_op = comp.func_to_container_op(randomforest)
LABEL_KEY = 'tips' FARE_KEY = 'fare' tf.compat.v1.logging.set_verbosity(tf.compat.v1.logging.INFO) # tf.get_logger().setLevel(logging.ERROR) eval_result = estimator.evaluate(input_fn=lambda: training_input_fn( tf_transform_output, os.path.join(trns_output, 'eval' + '*'), BATCH_SIZE, "tips"), steps=50) print(eval_result) data_validation_op = comp.func_to_container_op( data_validation, base_image='docker.io/stefanofioravanzo/kale-notebook:0.9') data_transformation_op = comp.func_to_container_op( data_transformation, base_image='docker.io/stefanofioravanzo/kale-notebook:0.9') train_op = comp.func_to_container_op( train, base_image='docker.io/stefanofioravanzo/kale-notebook:0.9') eval_op = comp.func_to_container_op( eval, base_image='docker.io/stefanofioravanzo/kale-notebook:0.9') @dsl.pipeline(
def create_tfjob_task(tfjob_name, tfjob_namespace, training_steps, katib_op, model_volume_op): import json # Get parameters from the Katib Experiment. # Parameters are in the format # "--tf-learning-rate=0.01 --tf-batch-size=100" convert_katib_results_op = components.func_to_container_op( katib.convert_katib_results, ) best_hp_op = convert_katib_results_op(katib_op.output) best_hps = str(best_hp_op.output) # Create the TFJob Chief and Worker specification with the best # Hyperparameters. # TODO (andreyvelich): Use community image for the mnist example. tfjob_chief_spec = { "replicas": 1, "restartPolicy": "OnFailure", "template": { "metadata": { "annotations": { "sidecar.istio.io/inject": "false", }, }, "spec": { "containers": [ { "name": "tensorflow", "image": "docker.io/liuhougangxa/tf-estimator-mnist", "command": [ "sh", "-c", ], "args": [ "python /opt/model.py " "--tf-export-dir=/mnt/export " "--tf-train-steps={} {}".format( training_steps, best_hps), ], "volumeMounts": [ { "mountPath": "/mnt/export", "name": "model-volume", }, ], }, ], "volumes": [ { "name": "model-volume", "persistentVolumeClaim": { "claimName": str(model_volume_op.outputs["name"]), }, }, ], }, }, } tfjob_worker_spec = { "replicas": 1, "restartPolicy": "OnFailure", "template": { "metadata": { "annotations": { "sidecar.istio.io/inject": "false", }, }, "spec": { "containers": [ { "name": "tensorflow", "image": "docker.io/liuhougangxa/tf-estimator-mnist", "command": [ "sh", "-c", ], "args": [ "python /opt/model.py " "--tf-export-dir=/mnt/export " "--tf-train-steps={} {}".format( training_steps, best_hps), ], }, ], }, }, } # Create the KFP task for the TFJob. tfjob_launcher_op = components.load_component_from_url(TFJOB_URL) op = tfjob_launcher_op(name=tfjob_name, namespace=tfjob_namespace, chief_spec=json.dumps(tfjob_chief_spec), worker_spec=json.dumps(tfjob_worker_spec), tfjob_timeout_minutes=60, delete_finished_tfjob=False) return op
true_label = test_labels[image_number] class_prediction = class_names[prediction] confidence = 100*np.max(predictions) actual = class_names[true_label] with open(f'{data_path}/result.txt', 'w') as result: result.write(" Prediction: {} | Confidence: {:2.0f}% | Actual: {}".format(class_prediction, confidence, actual)) print('Prediction has be saved successfully!') # Glue together training and inference function to the docker container train_op = comp.func_to_container_op(train, base_image='tensorflow/tensorflow:latest-gpu-py3') predict_op = comp.func_to_container_op(predict, base_image='tensorflow/tensorflow:latest-gpu-py3') # define pipeline metadata like name, description etc. @dsl.pipeline( name='MNIST Pipeline for train and prediction', description='Pipeline that trains MNIST models on GPU' ) # define virtual HDD space that the pipeline will take to run def mnist_container_pipeline(data_path='/mnt', model_file='mnist_model.h5', IMAGE_NUMBER='0'): vop = dsl.VolumeOp( name='create_volume', resource_name='data-volume', size='1Gi', modes=dsl.VOLUME_MODE_RWM
EXPERIMENT_NAME = "add" # Create component @dsl.python_component( name='add_op', description='adds two numbers', base_image=BASE_IMAGE ) def add(a: float, b: float) -> float: print("{} + {} = {}".format(a, b, a+b)) return a + b add_op = components.func_to_container_op( add, base_image=BASE_IMAGE ) # Build a pipeline using component @dsl.pipeline( name='Calculation pipeline', description='simple pipeline' ) def cal_pipeline(a: float, b: float): add_task = add_op(a, 4) add_task2 = add_op(a, b) add_task3 = add_op(add_task.output, add_task2.output)