Example #1
0
    def test_conflicting_name_renaming_stability(self):
        # Checking that already pythonic input names are not renamed
        # Checking that renaming is deterministic
        component_text = textwrap.dedent('''\
            inputs:
            - {name: Input 1}
            - {name: Input_1}
            - {name: Input-1}
            - {name: input_1}  # Last in the list, but is pythonic, so it should not be renamed
            implementation:
              container:
                image: busybox
                command:
                - inputValue: Input 1
                - inputValue: Input_1
                - inputValue: Input-1
                - inputValue: input_1
            ''')
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1(
            input_1_2='value_1_2',
            input_1_3='value_1_3',
            input_1_4='value_1_4',
            input_1='value_1',  # Expecting this input not to be renamed
        )
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(resolved_cmd.command,
                         ['value_1_2', 'value_1_3', 'value_1_4', 'value_1'])
Example #2
0
    def test_handle_training_xor(self):
        tests_root = os.path.abspath(os.path.dirname(__file__))
        component_root = os.path.abspath(os.path.join(tests_root, '..'))
        testdata_root = os.path.abspath(os.path.join(tests_root, 'testdata'))

        train_op = comp.load_component(
            os.path.join(component_root, 'component.yaml'))

        with tempfile.TemporaryDirectory() as temp_dir_name:
            with components_local_output_dir_context(temp_dir_name):
                train_task = train_op(
                    training_set_features_path=os.path.join(
                        testdata_root, 'training_set_features.tsv'),
                    training_set_labels_path=os.path.join(
                        testdata_root, 'training_set_labels.tsv'),
                    output_model_uri=os.path.join(temp_dir_name,
                                                  'outputs/output_model/data'),
                    model_config=Path(testdata_root).joinpath(
                        'model_config.json').read_text(),
                    number_of_classes=2,
                    number_of_epochs=10,
                    batch_size=32,
                )

            full_command = train_task.command + train_task.arguments
            full_command[0] = 'python'
            full_command[1] = os.path.join(component_root, 'src', 'train.py')

            process = subprocess.run(full_command)

            (output_model_uri_file, ) = (
                train_task.file_outputs['output-model-uri'], )
            output_model_uri = Path(output_model_uri_file).read_text()
Example #3
0
    def test_command_if_input_value_then(self):
        component_text = '''\
inputs:
- {name: Do test, type: Boolean, optional: true}
- {name: Test data, type: Integer, optional: true}
- {name: Test parameter 1, optional: true}
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: {inputValue: Do test}
          then: [--test-data, {inputValue: Test data}, --test-param1, {inputValue: Test parameter 1}]
'''
        task_factory1 = comp.load_component(text=component_text)

        task_then = task_factory1(True, 'test_data.txt', '42')
        resolved_cmd_then = _resolve_command_line_and_paths(
            task_then.component_ref.spec, task_then.arguments)
        self.assertEqual(
            resolved_cmd_then.args,
            ['--test-data', 'test_data.txt', '--test-param1', '42'])

        task_else = task_factory1()
        resolved_cmd_else = _resolve_command_line_and_paths(
            task_else.component_ref.spec, task_else.arguments)
        self.assertEqual(resolved_cmd_else.args, [])
Example #4
0
    def test_command_yaml_types(self):
        component_text = '''\
implementation:
  container:
    image: busybox
    arguments:
      # Nulls:
      - null #A null
      - #Also a null
      # Strings:
      - "" #empty string
      - "string"
      # Booleans
      - true
      - True
      - false
      - FALSE
      # Integers
      - 0
      - 0o7
      - 0x3A
      - -19
      # Floats
      - 0.
      - -0.0
      - .5
      - +12e03
      - -2E+05
      # Infinite floats
      - .inf
      - -.Inf
      - +.INF
      - .NAN
'''
        task_factory1 = comp.load_component(text=component_text)
        task = task_factory1()
        self.assertEqual(
            task.arguments,
            [
                #Nulls are skipped
                '',
                'string',
                'True',
                'True',
                'False',
                'False',
                '0',
                '0o7',
                '58',
                '-19',
                '0.0',
                '-0.0',
                '0.5',
                '+12e03',
                '-2E+05',
                'inf',
                '-inf',
                'inf',
                'nan',
            ])
Example #5
0
def load_component(component_path):
    """
    Loads component definition from .yaml file.
    :param component_path: Path to component definition
    """
    with open(component_path, "r") as inp:
        return components.load_component(text=inp.read())
    def test_loading_minimal_component(self):
        component_text = '''\
implementation:
  container:
    image: busybox
'''
        component_dict = load_yaml(component_text)
        task_factory1 = comp.load_component(text=component_text)

        task1 = task_factory1()
        assert task1.image == component_dict['implementation']['container']['image']
Example #7
0
    def test_loading_minimal_component(self):
        component_text = '''\
implementation:
  container:
    image: busybox
'''
        component_dict = load_yaml(component_text)
        task_factory1 = comp.load_component(text=component_text)

        self.assertEqual(
            task_factory1.component_spec.implementation.container.image,
            component_dict['implementation']['container']['image'])
Example #8
0
    def test_command_if_true_string_then_else(self):
        component_text = '''\
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: 'true'
          then: --true-arg
          else: --false-arg
'''
        task_factory1 = comp.load_component(text=component_text)
        task = task_factory1()
        self.assertEqual(task.arguments, ['--true-arg'])
Example #9
0
    def test_command_if_then_syntax1(self):
        component_text = '''\
inputs:
- {name: In, required: false}
implementation:
  dockerContainer:
    image: busybox
    arguments:
      - [if, [isPresent, In], [--in, [value, In]]]
'''
        task_factory1 = comp.load_component(text=component_text)

        task_then = task_factory1('data')
        self.assertEqual(task_then.arguments, ['--in', 'data'])
Example #10
0
    def test_input_value_resolving_syntax3(self):
        component_text = '''\
inputs:
- {name: Data}
implementation:
  dockerContainer:
    image: busybox
    arguments:
      - --data
      - value: Data
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1('some-data')

        self.assertEqual(task1.arguments, ['--data', 'some-data'])
Example #11
0
    def test_command_concat(self):
        component_text = '''\
inputs:
- {name: In1}
- {name: In2}
implementation:
  dockerContainer:
    image: busybox
    arguments:
      - concat: [{value: In1}, {value: In2}]
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1('some', 'data')

        self.assertEqual(task1.arguments, ['somedata'])
    def test_output_resolving(self):
        component_text = '''\
outputs:
- {name: Data}
implementation:
  container:
    image: busybox
    arguments:
      - --output-data
      - output: Data
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1(data='/outputs/some-data')

        self.assertEqual(task1.arguments, ['--output-data', '/outputs/some-data'])
Example #13
0
    def test_automatic_output_resolving(self):
        component_text = '''\
outputs:
- {name: Data}
implementation:
  dockerContainer:
    image: busybox
    arguments:
      - --output-data
      - {output: Data}
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1()

        self.assertEqual(len(task1.arguments), 2)
Example #14
0
    def test_command_if_true_string_then_else(self):
        component_text = '''\
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: 'true'
          then: --true-arg
          else: --false-arg
'''
        task_factory1 = comp.load_component(text=component_text)
        task = task_factory1()
        resolved_cmd = _resolve_command_line_and_paths(task.component_ref.spec,
                                                       task.arguments)
        self.assertEqual(resolved_cmd.args, ['--true-arg'])
Example #15
0
    def test_command_if_then_else(self):
        component_text = '''\
inputs:
- {name: In, required: false}
implementation:
  dockerContainer:
    image: busybox
    arguments:
      - if:
          cond: {isPresent: In}
          then: [--in, {value: In}]
          else: --no-in
'''
        task_factory1 = comp.load_component(text=component_text)

        task_then = task_factory1('data')
        self.assertEqual(task_then.arguments, ['--in', 'data'])
Example #16
0
    def test_input_value_resolving(self):
        component_text = '''\
inputs:
- {name: Data}
implementation:
  container:
    image: busybox
    args:
      - --data
      - inputValue: Data
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1('some-data')
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(resolved_cmd.args, ['--data', 'some-data'])
Example #17
0
    def test_command_concat(self):
        component_text = '''\
inputs:
- {name: In1}
- {name: In2}
implementation:
  container:
    image: busybox
    args:
      - concat: [{inputValue: In1}, {inputValue: In2}]
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1('some', 'data')
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(resolved_cmd.args, ['somedata'])
Example #18
0
    def test_automatic_output_resolving(self):
        component_text = '''\
outputs:
- {name: Data}
implementation:
  container:
    image: busybox
    args:
      - --output-data
      - {outputPath: Data}
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1()

        self.assertEqual(len(task1.arguments), 2)
        self.assertEqual(task1.arguments[0], '--output-data')
        self.assertTrue(task1.arguments[1].startswith('/'))
Example #19
0
    def test_command_if_is_present_then_else(self):
        component_text = '''\
inputs:
- {name: In, optional: true}
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: {isPresent: In}
          then: [--in, {inputValue: In}]
          else: --no-in
'''
        task_factory1 = comp.load_component(text=component_text)

        task_then = task_factory1('data')
        self.assertEqual(task_then.arguments, ['--in', 'data'])

        task_else = task_factory1()
        self.assertEqual(task_else.arguments, ['--no-in'])
    def test_command_if_input_value_then(self):
        component_text = '''\
inputs:
- {name: Do test, type: boolean, optional: true}
- {name: Test data, optional: true}
- {name: Test parameter 1, optional: true}
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: {inputValue: Do test}
          then: [--test-data, {inputValue: Test data}, --test-param1, {inputValue: Test parameter 1}]
'''
        task_factory1 = comp.load_component(text=component_text)

        task_then = task_factory1(True, 'test_data.txt', 42)
        self.assertEqual(task_then.arguments, ['--test-data', 'test_data.txt', '--test-param1', '42'])
        
        task_else = task_factory1()
        self.assertEqual(task_else.arguments, [])
Example #21
0
    def test_1(self):
        tests_root = os.path.abspath(os.path.dirname(__file__))
        component_root = os.path.abspath(os.path.join(tests_root, '..'))
        testdata_root = os.path.abspath(os.path.join(tests_root, 'testdata'))
        
        train_op = comp.load_component(os.path.join(component_root, 'component.yaml'))

        input_path_list = [{'PipelineParam': {'name': 'out_path', 'op_name': 'Spark Hash Preprocess', 'value': None, 'param_type': 'List',
                            'pattern': '/tmp/automl_spark/7e4b8111-f20b-4991-a3b4-90d6a0ce3389/pipeline-hbbsb-1543833624-hash'}},
         {'PipelineParam': {'name': 'out_label_path', 'op_name': 'Spark Label Preprocess', 'value': None,
                            'param_type': 'String',
                            'pattern': '/tmp/automl_spark/7e4b8111-f20b-4991-a3b4-90d6a0ce3389/pipeline-hbbsb-1645838448-label'}},
         {'PipelineParam': {'name': 'out_timestamp_path', 'op_name': 'Spark Timestamp Prepeocess', 'value': None,
                            'param_type': 'String',
                            'pattern': '/tmp/automl_spark/7e4b8111-f20b-4991-a3b4-90d6a0ce3389/pipeline-hbbsb-1323756077-timestamp'}}]

        tmp = ["file://" + os.path.join(testdata_root, "label.parquet"),
         "file://" + os.path.join(testdata_root, "time.parquet")]

        with tempfile.TemporaryDirectory() as temp_dir_name:
            with components_local_output_dir_context(temp_dir_name):
                train_task = train_op(
                    input_path_list=input_path_list,
                    label_name='rating',
                    test_size=0.8,
                    out_train_path = "file://"+os.path.join(testdata_root,"train.parquet"),
                    out_test_path= "file://"+os.path.join(testdata_root, "test.parquet")
                )

            full_command = train_task.command + train_task.arguments
            full_command[0] = 'python'
            full_command[1] = os.path.join(component_root, 'src', 'bind_preprocess.py')

            process = subprocess.run(full_command)

            (output_model_uri_file, ) = (train_task.file_outputs['out_train_path'], )
            output_model_uri = Path(output_model_uri_file).read_text()

            (output_model_uri_file,) = (train_task.file_outputs['out_test_path'],)
            output_model_uri = Path(output_model_uri_file).read_text()
Example #22
0
    def test_command_if_is_present_then(self):
        component_text = '''\
inputs:
- {name: In, optional: true}
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: {isPresent: In}
          then: [--in, {inputValue: In}]
          #else: --no-in
'''
        task_factory1 = comp.load_component(text=component_text)

        task_then = task_factory1('data')
        resolved_cmd_then = _resolve_command_line_and_paths(
            task_then.component_ref.spec, task_then.arguments)
        self.assertEqual(resolved_cmd_then.args, ['--in', 'data'])

        task_else = task_factory1()
        resolved_cmd_else = _resolve_command_line_and_paths(
            task_else.component_ref.spec, task_else.arguments)
        self.assertEqual(resolved_cmd_else.args, [])
"""
import os
import os.path as op

from kfp import dsl, components
import kfp.gcp as gcp
from kubernetes.client import V1Toleration

#######################################
# Load custom components
#######################################

###################
# Train Op
comp_train_fname = op.join('components', 'od_train', 'component.yaml')
train_component = components.load_component(filename=comp_train_fname)

###################
# Export Op
comp_export_fname = op.join('components', 'od_export', 'component.yaml')
export_component = components.load_component(filename=comp_export_fname)

########################################
# Define a toleration to a ML node taint 
ml_tol = V1Toleration(effect='NoSchedule', key='mlUseOnly', operator='Equal', value='true')
ml_tol2 = V1Toleration(effect='NoSchedule', key='nvidia.com/gpu', operator='Equal', value='present')

@dsl.pipeline(name='OD API training/export',
              description='A pipeline to train/export an instance segmentation model.')
def divot_detect_pipeline(
        pipeline_config_path,
Example #24
0
    def _cc_pipeline(self,
                     pipeline,
                     pipeline_name,
                     pipeline_version='',
                     experiment_name='',
                     cos_directory=None,
                     export=False):

        runtime_configuration = self._get_metadata_configuration(
            namespace=MetadataManager.NAMESPACE_RUNTIMES,
            name=pipeline.runtime_config)

        cos_endpoint = runtime_configuration.metadata['cos_endpoint']
        cos_username = runtime_configuration.metadata['cos_username']
        cos_password = runtime_configuration.metadata['cos_password']
        cos_secret = runtime_configuration.metadata.get('cos_secret')

        if cos_directory is None:
            cos_directory = pipeline_name
        cos_bucket = runtime_configuration.metadata['cos_bucket']

        self.log_pipeline_info(
            pipeline_name,
            f"processing pipeline dependencies to: {cos_endpoint} "
            f"bucket: {cos_bucket} folder: {cos_directory}")
        t0_all = time.time()

        emptydir_volume_size = ''
        container_runtime = bool(
            os.getenv('CRIO_RUNTIME', 'False').lower() == 'true')

        # Create dictionary that maps component Id to its ContainerOp instance
        notebook_ops = {}

        # Sort operations based on dependency graph (topological order)
        sorted_operations = PipelineProcessor._sort_operations(
            pipeline.operations)

        # All previous operation outputs should be propagated throughout the pipeline.
        # In order to process this recursively, the current operation's inputs should be combined
        # from its parent's inputs (which, themselves are derived from the outputs of their parent)
        # and its parent's outputs.

        PipelineProcessor._propagate_operation_inputs_outputs(
            pipeline, sorted_operations)

        for operation in sorted_operations:

            if container_runtime:
                # Volume size to create when using CRI-o, NOTE: IBM Cloud minimum is 20Gi
                emptydir_volume_size = '20Gi'

            # Collect env variables
            pipeline_envs = self._collect_envs(operation,
                                               cos_secret=cos_secret,
                                               cos_username=cos_username,
                                               cos_password=cos_password)

            sanitized_operation_name = self._sanitize_operation_name(
                operation.name)

            # Create pipeline operation
            # If operation is one of the "standard" set of NBs or scripts, construct custom NotebookOp
            if operation.classifier in [
                    "execute-notebook-node", "execute-python-node",
                    "execute-r-node"
            ]:

                operation_artifact_archive = self._get_dependency_archive_name(
                    operation)

                self.log.debug(
                    "Creating pipeline component :\n {op} archive : {archive}".
                    format(op=operation, archive=operation_artifact_archive))

                notebook_ops[operation.id] = NotebookOp(
                    name=sanitized_operation_name,
                    pipeline_name=pipeline_name,
                    experiment_name=experiment_name,
                    notebook=operation.filename,
                    cos_endpoint=cos_endpoint,
                    cos_bucket=cos_bucket,
                    cos_directory=cos_directory,
                    cos_dependencies_archive=operation_artifact_archive,
                    pipeline_version=pipeline_version,
                    pipeline_source=pipeline.source,
                    pipeline_inputs=operation.inputs,
                    pipeline_outputs=operation.outputs,
                    pipeline_envs=pipeline_envs,
                    emptydir_volume_size=emptydir_volume_size,
                    cpu_request=operation.cpu,
                    mem_request=operation.memory,
                    gpu_limit=operation.gpu,
                    image=operation.runtime_image,
                    file_outputs={
                        'mlpipeline-metrics':
                        '{}/mlpipeline-metrics.json'.format(
                            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR']),
                        'mlpipeline-ui-metadata':
                        '{}/mlpipeline-ui-metadata.json'.format(
                            pipeline_envs['ELYRA_WRITABLE_CONTAINER_DIR'])
                    })

                # TODO Can we move all of this to apply to non-standard components as well? Test when servers are up
                if cos_secret and not export:
                    notebook_ops[operation.id].apply(
                        use_aws_secret(cos_secret))

                image_namespace = self._get_metadata_configuration(
                    namespace=MetadataManager.NAMESPACE_RUNTIME_IMAGES)
                for image_instance in image_namespace:
                    if image_instance.metadata['image_name'] == operation.runtime_image and \
                            image_instance.metadata.get('pull_policy'):
                        notebook_ops[operation.id].container. \
                            set_image_pull_policy(image_instance.metadata['pull_policy'])

                self.log_pipeline_info(
                    pipeline_name,
                    f"processing operation dependencies for id: {operation.id}",
                    operation_name=operation.name)

                self._upload_dependencies_to_object_store(
                    runtime_configuration, cos_directory, operation)

            # If operation is a "non-standard" component, load it's spec and create operation with factory function
            else:
                component_source = {}
                component_source[
                    operation.
                    component_source_type] = operation.component_source

                # Build component task factory
                try:
                    factory_function = components.load_component(
                        **component_source)
                except Exception:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(
                        f"There was an error while loading component spec for {operation.name}."
                    )
                    raise RuntimeError(
                        f"There was an error while loading component spec for {operation.name}."
                    )

                # Add factory function, which returns a ContainerOp task instance, to pipeline operation dict
                try:
                    notebook_ops[operation.id] = factory_function(
                        **operation.component_params)
                except Exception:
                    # TODO Fix error messaging and break exceptions down into categories
                    self.log.error(
                        f"There was an error while constructing component {operation.name}."
                    )
                    raise RuntimeError(
                        f"There was an error while constructing component {operation.name}."
                    )

        # Process dependencies after all the operations have been created
        for operation in pipeline.operations.values():
            op = notebook_ops[operation.id]
            for parent_operation_id in operation.parent_operations:
                parent_op = notebook_ops[
                    parent_operation_id]  # Parent Operation
                op.after(parent_op)

        self.log_pipeline_info(pipeline_name,
                               "pipeline dependencies processed",
                               duration=(time.time() - t0_all))

        return notebook_ops
Example #25
0
 def test_load_component_fail_on_no_sources(self):
     comp.load_component()
Example #26
0
 def test_load_component_fail_on_multiple_sources(self):
     comp.load_component(filename='', text='')
Example #27
0
 def test_load_component_fail_on_none_arguments(self):
     comp.load_component(filename=None, url=None, text=None)
Example #28
0
 def test_load_component_fail_on_none_arguments(self):
     with self.assertRaises(ValueError):
         comp.load_component(filename=None, url=None, text=None)
Example #29
0
 def test_load_component_fail_on_multiple_sources(self):
     with self.assertRaises(ValueError):
         comp.load_component(filename='', text='')
Example #30
0
 def test_load_component_fail_on_no_sources(self):
     with self.assertRaises(ValueError):
         comp.load_component()