def test_conflicting_name_renaming_stability(self): # Checking that already pythonic input names are not renamed # Checking that renaming is deterministic component_text = textwrap.dedent('''\ inputs: - {name: Input 1} - {name: Input_1} - {name: Input-1} - {name: input_1} # Last in the list, but is pythonic, so it should not be renamed implementation: container: image: busybox command: - inputValue: Input 1 - inputValue: Input_1 - inputValue: Input-1 - inputValue: input_1 ''') task_factory1 = comp.load_component(text=component_text) task1 = task_factory1( input_1_2='value_1_2', input_1_3='value_1_3', input_1_4='value_1_4', input_1='value_1', # Expecting this input not to be renamed ) resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.command, ['value_1_2', 'value_1_3', 'value_1_4', 'value_1'])
def test_handle_training_xor(self): tests_root = os.path.abspath(os.path.dirname(__file__)) component_root = os.path.abspath(os.path.join(tests_root, '..')) testdata_root = os.path.abspath(os.path.join(tests_root, 'testdata')) train_op = comp.load_component( os.path.join(component_root, 'component.yaml')) with tempfile.TemporaryDirectory() as temp_dir_name: with components_local_output_dir_context(temp_dir_name): train_task = train_op( training_set_features_path=os.path.join( testdata_root, 'training_set_features.tsv'), training_set_labels_path=os.path.join( testdata_root, 'training_set_labels.tsv'), output_model_uri=os.path.join(temp_dir_name, 'outputs/output_model/data'), model_config=Path(testdata_root).joinpath( 'model_config.json').read_text(), number_of_classes=2, number_of_epochs=10, batch_size=32, ) full_command = train_task.command + train_task.arguments full_command[0] = 'python' full_command[1] = os.path.join(component_root, 'src', 'train.py') process = subprocess.run(full_command) (output_model_uri_file, ) = ( train_task.file_outputs['output-model-uri'], ) output_model_uri = Path(output_model_uri_file).read_text()
def test_command_if_input_value_then(self): component_text = '''\ inputs: - {name: Do test, type: Boolean, optional: true} - {name: Test data, type: Integer, optional: true} - {name: Test parameter 1, optional: true} implementation: container: image: busybox args: - if: cond: {inputValue: Do test} then: [--test-data, {inputValue: Test data}, --test-param1, {inputValue: Test parameter 1}] ''' task_factory1 = comp.load_component(text=component_text) task_then = task_factory1(True, 'test_data.txt', '42') resolved_cmd_then = _resolve_command_line_and_paths( task_then.component_ref.spec, task_then.arguments) self.assertEqual( resolved_cmd_then.args, ['--test-data', 'test_data.txt', '--test-param1', '42']) task_else = task_factory1() resolved_cmd_else = _resolve_command_line_and_paths( task_else.component_ref.spec, task_else.arguments) self.assertEqual(resolved_cmd_else.args, [])
def test_command_yaml_types(self): component_text = '''\ implementation: container: image: busybox arguments: # Nulls: - null #A null - #Also a null # Strings: - "" #empty string - "string" # Booleans - true - True - false - FALSE # Integers - 0 - 0o7 - 0x3A - -19 # Floats - 0. - -0.0 - .5 - +12e03 - -2E+05 # Infinite floats - .inf - -.Inf - +.INF - .NAN ''' task_factory1 = comp.load_component(text=component_text) task = task_factory1() self.assertEqual( task.arguments, [ #Nulls are skipped '', 'string', 'True', 'True', 'False', 'False', '0', '0o7', '58', '-19', '0.0', '-0.0', '0.5', '+12e03', '-2E+05', 'inf', '-inf', 'inf', 'nan', ])
def load_component(component_path): """ Loads component definition from .yaml file. :param component_path: Path to component definition """ with open(component_path, "r") as inp: return components.load_component(text=inp.read())
def test_loading_minimal_component(self): component_text = '''\ implementation: container: image: busybox ''' component_dict = load_yaml(component_text) task_factory1 = comp.load_component(text=component_text) task1 = task_factory1() assert task1.image == component_dict['implementation']['container']['image']
def test_loading_minimal_component(self): component_text = '''\ implementation: container: image: busybox ''' component_dict = load_yaml(component_text) task_factory1 = comp.load_component(text=component_text) self.assertEqual( task_factory1.component_spec.implementation.container.image, component_dict['implementation']['container']['image'])
def test_command_if_true_string_then_else(self): component_text = '''\ implementation: container: image: busybox args: - if: cond: 'true' then: --true-arg else: --false-arg ''' task_factory1 = comp.load_component(text=component_text) task = task_factory1() self.assertEqual(task.arguments, ['--true-arg'])
def test_command_if_then_syntax1(self): component_text = '''\ inputs: - {name: In, required: false} implementation: dockerContainer: image: busybox arguments: - [if, [isPresent, In], [--in, [value, In]]] ''' task_factory1 = comp.load_component(text=component_text) task_then = task_factory1('data') self.assertEqual(task_then.arguments, ['--in', 'data'])
def test_input_value_resolving_syntax3(self): component_text = '''\ inputs: - {name: Data} implementation: dockerContainer: image: busybox arguments: - --data - value: Data ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1('some-data') self.assertEqual(task1.arguments, ['--data', 'some-data'])
def test_command_concat(self): component_text = '''\ inputs: - {name: In1} - {name: In2} implementation: dockerContainer: image: busybox arguments: - concat: [{value: In1}, {value: In2}] ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1('some', 'data') self.assertEqual(task1.arguments, ['somedata'])
def test_output_resolving(self): component_text = '''\ outputs: - {name: Data} implementation: container: image: busybox arguments: - --output-data - output: Data ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1(data='/outputs/some-data') self.assertEqual(task1.arguments, ['--output-data', '/outputs/some-data'])
def test_automatic_output_resolving(self): component_text = '''\ outputs: - {name: Data} implementation: dockerContainer: image: busybox arguments: - --output-data - {output: Data} ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1() self.assertEqual(len(task1.arguments), 2)
def test_command_if_true_string_then_else(self): component_text = '''\ implementation: container: image: busybox args: - if: cond: 'true' then: --true-arg else: --false-arg ''' task_factory1 = comp.load_component(text=component_text) task = task_factory1() resolved_cmd = _resolve_command_line_and_paths(task.component_ref.spec, task.arguments) self.assertEqual(resolved_cmd.args, ['--true-arg'])
def test_command_if_then_else(self): component_text = '''\ inputs: - {name: In, required: false} implementation: dockerContainer: image: busybox arguments: - if: cond: {isPresent: In} then: [--in, {value: In}] else: --no-in ''' task_factory1 = comp.load_component(text=component_text) task_then = task_factory1('data') self.assertEqual(task_then.arguments, ['--in', 'data'])
def test_input_value_resolving(self): component_text = '''\ inputs: - {name: Data} implementation: container: image: busybox args: - --data - inputValue: Data ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1('some-data') resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.args, ['--data', 'some-data'])
def test_command_concat(self): component_text = '''\ inputs: - {name: In1} - {name: In2} implementation: container: image: busybox args: - concat: [{inputValue: In1}, {inputValue: In2}] ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1('some', 'data') resolved_cmd = _resolve_command_line_and_paths( task1.component_ref.spec, task1.arguments) self.assertEqual(resolved_cmd.args, ['somedata'])
def test_automatic_output_resolving(self): component_text = '''\ outputs: - {name: Data} implementation: container: image: busybox args: - --output-data - {outputPath: Data} ''' task_factory1 = comp.load_component(text=component_text) task1 = task_factory1() self.assertEqual(len(task1.arguments), 2) self.assertEqual(task1.arguments[0], '--output-data') self.assertTrue(task1.arguments[1].startswith('/'))
def test_command_if_is_present_then_else(self): component_text = '''\ inputs: - {name: In, optional: true} implementation: container: image: busybox args: - if: cond: {isPresent: In} then: [--in, {inputValue: In}] else: --no-in ''' task_factory1 = comp.load_component(text=component_text) task_then = task_factory1('data') self.assertEqual(task_then.arguments, ['--in', 'data']) task_else = task_factory1() self.assertEqual(task_else.arguments, ['--no-in'])
def test_command_if_input_value_then(self): component_text = '''\ inputs: - {name: Do test, type: boolean, optional: true} - {name: Test data, optional: true} - {name: Test parameter 1, optional: true} implementation: container: image: busybox args: - if: cond: {inputValue: Do test} then: [--test-data, {inputValue: Test data}, --test-param1, {inputValue: Test parameter 1}] ''' task_factory1 = comp.load_component(text=component_text) task_then = task_factory1(True, 'test_data.txt', 42) self.assertEqual(task_then.arguments, ['--test-data', 'test_data.txt', '--test-param1', '42']) task_else = task_factory1() self.assertEqual(task_else.arguments, [])
def test_1(self): tests_root = os.path.abspath(os.path.dirname(__file__)) component_root = os.path.abspath(os.path.join(tests_root, '..')) testdata_root = os.path.abspath(os.path.join(tests_root, 'testdata')) train_op = comp.load_component(os.path.join(component_root, 'component.yaml')) input_path_list = [{'PipelineParam': {'name': 'out_path', 'op_name': 'Spark Hash Preprocess', 'value': None, 'param_type': 'List', 'pattern': '/tmp/automl_spark/7e4b8111-f20b-4991-a3b4-90d6a0ce3389/pipeline-hbbsb-1543833624-hash'}}, {'PipelineParam': {'name': 'out_label_path', 'op_name': 'Spark Label Preprocess', 'value': None, 'param_type': 'String', 'pattern': '/tmp/automl_spark/7e4b8111-f20b-4991-a3b4-90d6a0ce3389/pipeline-hbbsb-1645838448-label'}}, {'PipelineParam': {'name': 'out_timestamp_path', 'op_name': 'Spark Timestamp Prepeocess', 'value': None, 'param_type': 'String', 'pattern': '/tmp/automl_spark/7e4b8111-f20b-4991-a3b4-90d6a0ce3389/pipeline-hbbsb-1323756077-timestamp'}}] tmp = ["file://" + os.path.join(testdata_root, "label.parquet"), "file://" + os.path.join(testdata_root, "time.parquet")] with tempfile.TemporaryDirectory() as temp_dir_name: with components_local_output_dir_context(temp_dir_name): train_task = train_op( input_path_list=input_path_list, label_name='rating', test_size=0.8, out_train_path = "file://"+os.path.join(testdata_root,"train.parquet"), out_test_path= "file://"+os.path.join(testdata_root, "test.parquet") ) full_command = train_task.command + train_task.arguments full_command[0] = 'python' full_command[1] = os.path.join(component_root, 'src', 'bind_preprocess.py') process = subprocess.run(full_command) (output_model_uri_file, ) = (train_task.file_outputs['out_train_path'], ) output_model_uri = Path(output_model_uri_file).read_text() (output_model_uri_file,) = (train_task.file_outputs['out_test_path'],) output_model_uri = Path(output_model_uri_file).read_text()
def test_command_if_is_present_then(self): component_text = '''\ inputs: - {name: In, optional: true} implementation: container: image: busybox args: - if: cond: {isPresent: In} then: [--in, {inputValue: In}] #else: --no-in ''' task_factory1 = comp.load_component(text=component_text) task_then = task_factory1('data') resolved_cmd_then = _resolve_command_line_and_paths( task_then.component_ref.spec, task_then.arguments) self.assertEqual(resolved_cmd_then.args, ['--in', 'data']) task_else = task_factory1() resolved_cmd_else = _resolve_command_line_and_paths( task_else.component_ref.spec, task_else.arguments) self.assertEqual(resolved_cmd_else.args, [])
""" import os import os.path as op from kfp import dsl, components import kfp.gcp as gcp from kubernetes.client import V1Toleration ####################################### # Load custom components ####################################### ################### # Train Op comp_train_fname = op.join('components', 'od_train', 'component.yaml') train_component = components.load_component(filename=comp_train_fname) ################### # Export Op comp_export_fname = op.join('components', 'od_export', 'component.yaml') export_component = components.load_component(filename=comp_export_fname) ######################################## # Define a toleration to a ML node taint ml_tol = V1Toleration(effect='NoSchedule', key='mlUseOnly', operator='Equal', value='true') ml_tol2 = V1Toleration(effect='NoSchedule', key='nvidia.com/gpu', operator='Equal', value='present') @dsl.pipeline(name='OD API training/export', description='A pipeline to train/export an instance segmentation model.') def divot_detect_pipeline( pipeline_config_path,
def _cc_pipeline(self, pipeline, pipeline_name, pipeline_version='', experiment_name='', cos_directory=None, export=False): runtime_configuration = self._get_metadata_configuration( namespace=MetadataManager.NAMESPACE_RUNTIMES, name=pipeline.runtime_config) cos_endpoint = runtime_configuration.metadata['cos_endpoint'] cos_username = runtime_configuration.metadata['cos_username'] cos_password = runtime_configuration.metadata['cos_password'] cos_secret = runtime_configuration.metadata.get('cos_secret') if cos_directory is None: cos_directory = pipeline_name cos_bucket = runtime_configuration.metadata['cos_bucket'] self.log_pipeline_info( pipeline_name, f"processing pipeline dependencies to: {cos_endpoint} " f"bucket: {cos_bucket} folder: {cos_directory}") t0_all = time.time() emptydir_volume_size = '' container_runtime = bool( os.getenv('CRIO_RUNTIME', 'False').lower() == 'true') # Create dictionary that maps component Id to its ContainerOp instance notebook_ops = {} # Sort operations based on dependency graph (topological order) sorted_operations = PipelineProcessor._sort_operations( pipeline.operations) # All previous operation outputs should be propagated throughout the pipeline. # In order to process this recursively, the current operation's inputs should be combined # from its parent's inputs (which, themselves are derived from the outputs of their parent) # and its parent's outputs. PipelineProcessor._propagate_operation_inputs_outputs( pipeline, sorted_operations) for operation in sorted_operations: if container_runtime: # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi emptydir_volume_size = '20Gi' # Collect env variables pipeline_envs = self._collect_envs(operation, cos_secret=cos_secret, cos_username=cos_username, cos_password=cos_password) sanitized_operation_name = self._sanitize_operation_name( operation.name) # Create pipeline operation # If operation is one of the "standard" set of NBs or scripts, construct custom NotebookOp if operation.classifier in [ "execute-notebook-node", "execute-python-node", "execute-r-node" ]: operation_artifact_archive = self._get_dependency_archive_name( operation) self.log.debug( "Creating pipeline component :\n {op} archive : {archive}". format(op=operation, archive=operation_artifact_archive)) notebook_ops[operation.id] = NotebookOp( name=sanitized_operation_name, pipeline_name=pipeline_name, experiment_name=experiment_name, notebook=operation.filename, cos_endpoint=cos_endpoint, cos_bucket=cos_bucket, cos_directory=cos_directory, cos_dependencies_archive=operation_artifact_archive, pipeline_version=pipeline_version, pipeline_source=pipeline.source, pipeline_inputs=operation.inputs, pipeline_outputs=operation.outputs, pipeline_envs=pipeline_envs, emptydir_volume_size=emptydir_volume_size, cpu_request=operation.cpu, mem_request=operation.memory, gpu_limit=operation.gpu, image=operation.runtime_image, file_outputs={ 'mlpipeline-metrics': '{}/mlpipeline-metrics.json'.format( pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']), 'mlpipeline-ui-metadata': '{}/mlpipeline-ui-metadata.json'.format( pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']) }) # TODO Can we move all of this to apply to non-standard components as well? Test when servers are up if cos_secret and not export: notebook_ops[operation.id].apply( use_aws_secret(cos_secret)) image_namespace = self._get_metadata_configuration( namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES) for image_instance in image_namespace: if image_instance.metadata['image_name'] == operation.runtime_image and \ image_instance.metadata.get('pull_policy'): notebook_ops[operation.id].container. \ set_image_pull_policy(image_instance.metadata['pull_policy']) self.log_pipeline_info( pipeline_name, f"processing operation dependencies for id: {operation.id}", operation_name=operation.name) self._upload_dependencies_to_object_store( runtime_configuration, cos_directory, operation) # If operation is a "non-standard" component, load it's spec and create operation with factory function else: component_source = {} component_source[ operation. component_source_type] = operation.component_source # Build component task factory try: factory_function = components.load_component( **component_source) except Exception: # TODO Fix error messaging and break exceptions down into categories self.log.error( f"There was an error while loading component spec for {operation.name}." ) raise RuntimeError( f"There was an error while loading component spec for {operation.name}." ) # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict try: notebook_ops[operation.id] = factory_function( **operation.component_params) except Exception: # TODO Fix error messaging and break exceptions down into categories self.log.error( f"There was an error while constructing component {operation.name}." ) raise RuntimeError( f"There was an error while constructing component {operation.name}." ) # Process dependencies after all the operations have been created for operation in pipeline.operations.values(): op = notebook_ops[operation.id] for parent_operation_id in operation.parent_operations: parent_op = notebook_ops[ parent_operation_id] # Parent Operation op.after(parent_op) self.log_pipeline_info(pipeline_name, "pipeline dependencies processed", duration=(time.time() - t0_all)) return notebook_ops
def test_load_component_fail_on_no_sources(self): comp.load_component()
def test_load_component_fail_on_multiple_sources(self): comp.load_component(filename='', text='')
def test_load_component_fail_on_none_arguments(self): comp.load_component(filename=None, url=None, text=None)
def test_load_component_fail_on_none_arguments(self): with self.assertRaises(ValueError): comp.load_component(filename=None, url=None, text=None)
def test_load_component_fail_on_multiple_sources(self): with self.assertRaises(ValueError): comp.load_component(filename='', text='')
def test_load_component_fail_on_no_sources(self): with self.assertRaises(ValueError): comp.load_component()