Esempio n. 1
0
    def test_command_if_input_value_then(self):
        component_text = '''\
inputs:
- {name: Do test, type: Boolean, optional: true}
- {name: Test data, type: Integer, optional: true}
- {name: Test parameter 1, optional: true}
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: {inputValue: Do test}
          then: [--test-data, {inputValue: Test data}, --test-param1, {inputValue: Test parameter 1}]
'''
        task_factory1 = comp.load_component(text=component_text)

        task_then = task_factory1(True, 'test_data.txt', '42')
        resolved_cmd_then = _resolve_command_line_and_paths(
            task_then.component_ref.spec, task_then.arguments)
        self.assertEqual(
            resolved_cmd_then.args,
            ['--test-data', 'test_data.txt', '--test-param1', '42'])

        task_else = task_factory1()
        resolved_cmd_else = _resolve_command_line_and_paths(
            task_else.component_ref.spec, task_else.arguments)
        self.assertEqual(resolved_cmd_else.args, [])
    def helper_test_component_using_local_call(self, component_task_factory: Callable, arguments: dict = None, expected_output_values: dict = None):
        arguments = arguments or {}
        expected_output_values = expected_output_values or {}
        with tempfile.TemporaryDirectory() as temp_dir_name:
            # Creating task from the component.
            # We do it in a special context that allows us to control the output file locations.
            inputs_path = Path(temp_dir_name) / 'inputs'
            outputs_path = Path(temp_dir_name) / 'outputs'
            with components_override_input_output_dirs_context(str(inputs_path), str(outputs_path)):
                task = component_task_factory(**arguments)
                resolved_cmd = _resolve_command_line_and_paths(
                    task.component_ref.spec,
                    task.arguments,
                )

            # Preparing input files
            for input_name, input_file_path in (resolved_cmd.input_paths or {}).items():
                Path(input_file_path).parent.mkdir(parents=True, exist_ok=True)
                Path(input_file_path).write_text(str(arguments[input_name]))

            # Constructing the full command-line from resolved command+args
            full_command = resolved_cmd.command + resolved_cmd.args

            # Executing the command-line locally
            subprocess.run(full_command, check=True)

            actual_output_values_dict = {output_name: Path(output_path).read_text() for output_name, output_path in resolved_cmd.output_paths.items()}

        self.assertDictEqual(actual_output_values_dict, expected_output_values)
Esempio n. 3
0
    def test_conflicting_name_renaming_stability(self):
        # Checking that already pythonic input names are not renamed
        # Checking that renaming is deterministic
        component_text = textwrap.dedent('''\
            inputs:
            - {name: Input 1}
            - {name: Input_1}
            - {name: Input-1}
            - {name: input_1}  # Last in the list, but is pythonic, so it should not be renamed
            implementation:
              container:
                image: busybox
                command:
                - inputValue: Input 1
                - inputValue: Input_1
                - inputValue: Input-1
                - inputValue: input_1
            ''')
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1(
            input_1_2='value_1_2',
            input_1_3='value_1_3',
            input_1_4='value_1_4',
            input_1='value_1',  # Expecting this input not to be renamed
        )
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(resolved_cmd.command,
                         ['value_1_2', 'value_1_3', 'value_1_4', 'value_1'])
Esempio n. 4
0
    def helper_test_2_in_2_out_component_using_local_call(
            self, func, op, output_names):
        arg1 = float(3)
        arg2 = float(5)

        expected_tuple = func(arg1, arg2)
        expected1_str = str(expected_tuple[0])
        expected2_str = str(expected_tuple[1])

        with tempfile.TemporaryDirectory() as temp_dir_name:
            with components_local_output_dir_context(temp_dir_name):
                task = op(arg1, arg2)
                resolved_cmd = _resolve_command_line_and_paths(
                    task.component_ref.spec,
                    task.arguments,
                )

            full_command = resolved_cmd.command + resolved_cmd.args

            subprocess.run(full_command, check=True)

            (output_path1,
             output_path2) = (resolved_cmd.output_paths[output_names[0]],
                              resolved_cmd.output_paths[output_names[1]])
            actual1_str = Path(output_path1).read_text()
            actual2_str = Path(output_path2).read_text()

        self.assertEqual(float(actual1_str), float(expected1_str))
        self.assertEqual(float(actual2_str), float(expected2_str))
Esempio n. 5
0
    def test_handle_default_values_in_task_factory(self):
        component_text = '''\
inputs:
- {name: Data, default: '123'}
implementation:
  container:
    image: busybox
    args:
      - {inputValue: Data}
'''
        task_factory1 = comp.load_component_from_text(text=component_text)

        task1 = task_factory1()
        resolved_cmd1 = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)
        self.assertEqual(resolved_cmd1.args, ['123'])

        task2 = task_factory1('456')
        resolved_cmd2 = _resolve_command_line_and_paths(
            task2.component_ref.spec, task2.arguments)
        self.assertEqual(resolved_cmd2.args, ['456'])
Esempio n. 6
0
    def test_command_if_is_present_then_else(self):
        component_text = '''\
inputs:
- {name: In, optional: true}
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: {isPresent: In}
          then: [--in, {inputValue: In}]
          else: --no-in
'''
        task_factory1 = comp.load_component(text=component_text)

        task_then = task_factory1('data')
        resolved_cmd_then = _resolve_command_line_and_paths(
            task_then.component_ref.spec, task_then.arguments)
        self.assertEqual(resolved_cmd_then.args, ['--in', 'data'])

        task_else = task_factory1()
        resolved_cmd_else = _resolve_command_line_and_paths(
            task_else.component_ref.spec, task_else.arguments)
        self.assertEqual(resolved_cmd_else.args, ['--no-in'])
Esempio n. 7
0
    def test_handling_list_arguments_containing_pipelineparam(self):
        '''Checks that lists containing PipelineParam can be properly serialized'''
        def consume_list(list_param: list) -> int:
            pass

        import kfp
        task_factory = comp.func_to_container_op(consume_list)
        task = task_factory([1, 2, 3, kfp.dsl.PipelineParam("aaa"), 4, 5, 6])
        resolved_cmd = _resolve_command_line_and_paths(
            task.component_ref.spec,
            task.arguments,
        )
        full_command_line = resolved_cmd.command + resolved_cmd.args
        for arg in full_command_line:
            self.assertNotIn('PipelineParam', arg)
Esempio n. 8
0
    def test_command_if_true_string_then_else(self):
        component_text = '''\
implementation:
  container:
    image: busybox
    args:
      - if:
          cond: 'true'
          then: --true-arg
          else: --false-arg
'''
        task_factory1 = comp.load_component(text=component_text)
        task = task_factory1()
        resolved_cmd = _resolve_command_line_and_paths(task.component_ref.spec,
                                                       task.arguments)
        self.assertEqual(resolved_cmd.args, ['--true-arg'])
Esempio n. 9
0
    def test_command_concat(self):
        component_text = '''\
inputs:
- {name: In1}
- {name: In2}
implementation:
  container:
    image: busybox
    args:
      - concat: [{inputValue: In1}, {inputValue: In2}]
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1('some', 'data')
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(resolved_cmd.args, ['somedata'])
Esempio n. 10
0
    def test_input_value_resolving(self):
        component_text = '''\
inputs:
- {name: Data}
implementation:
  container:
    image: busybox
    args:
      - --data
      - inputValue: Data
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1('some-data')
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(resolved_cmd.args, ['--data', 'some-data'])
Esempio n. 11
0
    def test_input_path_placeholder_with_constant_argument(self):
        component_text = '''\
inputs:
- {name: input 1}
implementation:
  container:
    image: busybox
    command:
      - --input-data
      - {inputPath: input 1}
'''
        task_factory1 = comp.load_component_from_text(component_text)
        task1 = task_factory1('Text')
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(resolved_cmd.command,
                         ['--input-data', resolved_cmd.input_paths['input 1']])
        self.assertEqual(task1.arguments, {'input 1': 'Text'})
Esempio n. 12
0
    def test_automatic_output_resolving(self):
        component_text = '''\
outputs:
- {name: Data}
implementation:
  container:
    image: busybox
    args:
      - --output-data
      - {outputPath: Data}
'''
        task_factory1 = comp.load_component(text=component_text)
        task1 = task_factory1()
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(len(resolved_cmd.args), 2)
        self.assertEqual(resolved_cmd.args[0], '--output-data')
        self.assertTrue(resolved_cmd.args[1].startswith('/'))
Esempio n. 13
0
    def _test_load_component_from_file(self, component_path: str):
        task_factory1 = comp.load_component_from_file(component_path)

        arg1 = 3
        arg2 = 5
        task1 = task_factory1(arg1, arg2)

        self.assertEqual(task_factory1.__name__, 'Add')
        self.assertEqual(task_factory1.__doc__.strip(),
                         'Add\nReturns sum of two arguments')

        self.assertEqual(
            task1.component_ref.spec.implementation.container.image,
            'python:3.5')

        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)
        self.assertEqual(resolved_cmd.args[0], str(arg1))
        self.assertEqual(resolved_cmd.args[1], str(arg2))
Esempio n. 14
0
    def test_missing_optional_input_file_argument(self):
        '''Missing optional inputs should resolve to nothing'''
        component_text = '''\
inputs:
- {name: input 1, optional: true}
implementation:
  container:
    image: busybox
    command:
      - a
      - {inputPath: input 1}
      - z
'''
        task_factory1 = comp.load_component_from_text(component_text)
        task1 = task_factory1()
        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)

        self.assertEqual(resolved_cmd.command, ['a', 'z'])
    def helper_test_2_in_1_out_component_using_local_call(self, func, op, arguments=[3., 5.]):
        expected = func(arguments[0], arguments[1])
        if isinstance(expected, tuple):
            expected = expected[0]
        expected_str = str(expected)

        with tempfile.TemporaryDirectory() as temp_dir_name:
            with components_local_output_dir_context(temp_dir_name):
                task = op(arguments[0], arguments[1])
                resolved_cmd = _resolve_command_line_and_paths(
                    task.component_ref.spec,
                    task.arguments,
                )

            full_command = resolved_cmd.command + resolved_cmd.args
            subprocess.run(full_command, check=True)

            output_path = list(resolved_cmd.output_paths.values())[0]
            actual_str = Path(output_path).read_text()

        self.assertEqual(float(actual_str), float(expected_str))
Esempio n. 16
0
    def test_load_component_from_url(self):
        url = 'https://raw.githubusercontent.com/kubeflow/pipelines/e54fe675432cfef1d115a7a2909f08ed95ea8933/sdk/python/tests/components/test_data/python_add.component.yaml'

        import requests
        resp = requests.get(url)
        component_text = resp.content
        component_dict = load_yaml(component_text)
        task_factory1 = comp.load_component_from_url(url)
        self.assertEqual(
            task_factory1.__doc__,
            component_dict['name'] + '\n' + component_dict['description'])

        arg1 = 3
        arg2 = 5
        task1 = task_factory1(arg1, arg2)
        self.assertEqual(
            task1.component_ref.spec.implementation.container.image,
            component_dict['implementation']['container']['image'])

        resolved_cmd = _resolve_command_line_and_paths(
            task1.component_ref.spec, task1.arguments)
        self.assertEqual(resolved_cmd.args[0], str(arg1))
        self.assertEqual(resolved_cmd.args[1], str(arg2))
Esempio n. 17
0
import kfp.components as comp
from kfp.components._components import _resolve_command_line_and_paths

my_op = kfp.components.load_component_from_file('tarsan_gen_images.yaml') 

print("**** LOADED COMPONENT:")
print(my_op.component_spec)

sample_count = 20

cmd_args = {
   "samples_count": sample_count,
}

cmd = _resolve_command_line_and_paths(
         component_spec = my_op.component_spec,
         arguments = cmd_args,
      )

print("\nIMAGE:\n")
print(my_op.component_spec.implementation.container.image)

print("\nCOMMAND\n")
print(cmd.command)

print("\nARGS\n")
print(cmd.args)

import docker
docker_client = docker.from_env()

print("\nCOMMAND:")
Esempio n. 18
0
    def _resolve_commands_and_args_v2(
        component_spec: _structures.ComponentSpec,
        arguments: Mapping[str, Any],
    ) -> _components._ResolvedCommandLineAndPaths:
        """Resolves the command line argument placeholders for v2 (IR).

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.

    Returns:
      A named tuple: _components._ResolvedCommandLineAndPaths.
    """
        inputs_dict = {
            input_spec.name: input_spec
            for input_spec in component_spec.inputs or []
        }
        outputs_dict = {
            output_spec.name: output_spec
            for output_spec in component_spec.outputs or []
        }

        def _input_artifact_uri_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputUriPlaceholder.'.format(input_key,
                                                  inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

        def _input_artifact_path_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputPathPlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            elif is_compiling_for_v2 and input_key in importer_specs:
                raise TypeError(
                    'Input "{}" with type "{}" is not connected to any upstream output. '
                    'However it is used with InputPathPlaceholder. '
                    'If you want to import an existing artifact using a system-connected'
                    ' importer node, use InputUriPlaceholder instead. '
                    'Or if you just want to pass a string parameter, use string type and'
                    ' InputValuePlaceholder instead.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.artifacts['{}'].path}}}}".format(
                    input_key)

        def _input_parameter_placeholder(input_key: str) -> str:
            if is_compiling_for_v2 and not type_utils.is_parameter_type(
                    inputs_dict[input_key].type):
                raise TypeError(
                    'Input "{}" with type "{}" cannot be paired with '
                    'InputValuePlaceholder.'.format(
                        input_key, inputs_dict[input_key].type))
            else:
                return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

        def _output_artifact_uri_placeholder(output_key: str) -> str:
            if is_compiling_for_v2 and type_utils.is_parameter_type(
                    outputs_dict[output_key].type):
                raise TypeError(
                    'Output "{}" with type "{}" cannot be paired with '
                    'OutputUriPlaceholder.'.format(
                        output_key, outputs_dict[output_key].type))
            else:
                return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(
                    output_key)

        def _output_artifact_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

        def _output_parameter_path_placeholder(output_key: str) -> str:
            return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
                output_key)

        def _resolve_output_path_placeholder(output_key: str) -> str:
            if type_utils.is_parameter_type(outputs_dict[output_key].type):
                return _output_parameter_path_placeholder(output_key)
            else:
                return _output_artifact_path_placeholder(output_key)

        resolved_cmd = _components._resolve_command_line_and_paths(
            component_spec=component_spec,
            arguments=arguments,
            input_value_generator=_input_parameter_placeholder,
            input_uri_generator=_input_artifact_uri_placeholder,
            output_uri_generator=_output_artifact_uri_placeholder,
            input_path_generator=_input_artifact_path_placeholder,
            output_path_generator=_resolve_output_path_placeholder,
        )
        return resolved_cmd
Esempio n. 19
0
def create_container_op_from_component_and_arguments(
    component_spec: structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: Optional[structures.ComponentReference] = None,
) -> container_op.ContainerOp:
  """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: (not used in v2)

  Returns:
    A ContainerOp instance.
  """

  pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()

  # Keep track of auto-injected importer spec.
  importer_spec = {}

  # Check types of the reference arguments and serialize PipelineParams
  arguments = arguments.copy()
  for input_name, argument_value in arguments.items():
    if isinstance(argument_value, dsl.PipelineParam):
      input_type = component_spec._inputs_dict[input_name].type
      reference_type = argument_value.param_type
      types.verify_type_compatibility(
          reference_type, input_type,
          'Incompatible argument passed to the input "{}" of component "{}": '
          .format(input_name, component_spec.name))

      arguments[input_name] = str(argument_value)

      if type_utils.is_parameter_type(input_type):
        if argument_value.op_name:
          pipeline_task_spec.inputs.parameters[
              input_name].task_output_parameter.producer_task = (
                  argument_value.op_name)
          pipeline_task_spec.inputs.parameters[
              input_name].task_output_parameter.output_parameter_key = (
                  argument_value.name)
        else:
          pipeline_task_spec.inputs.parameters[
              input_name].runtime_value.runtime_parameter = argument_value.name
      else:
        if argument_value.op_name:
          pipeline_task_spec.inputs.artifacts[input_name].producer_task = (
              argument_value.op_name)
          pipeline_task_spec.inputs.artifacts[
              input_name].output_artifact_key = (
                  argument_value.name)
        else:
          # argument_value.op_name could be none, in which case an importer node
          # will be inserted later.
          pipeline_task_spec.inputs.artifacts[input_name].producer_task = ''
          type_schema = type_utils.get_input_artifact_type_schema(
              input_name, component_spec.inputs)
          importer_spec[input_name] = importer_node.build_importer_spec(
              input_type_schema=type_schema,
              pipeline_param_name=argument_value.name)
    elif isinstance(argument_value, str):
      input_type = component_spec._inputs_dict[input_name].type
      if type_utils.is_parameter_type(input_type):
        pipeline_task_spec.inputs.parameters[
            input_name].runtime_value.constant_value.string_value = (
                argument_value)
      else:
        # An importer node with constant value artifact_uri will be inserted.
        pipeline_task_spec.inputs.artifacts[input_name].producer_task = ''
        type_schema = type_utils.get_input_artifact_type_schema(
            input_name, component_spec.inputs)
        importer_spec[input_name] = importer_node.build_importer_spec(
            input_type_schema=type_schema, constant_value=argument_value)
    elif isinstance(argument_value, int):
      pipeline_task_spec.inputs.parameters[
          input_name].runtime_value.constant_value.int_value = argument_value
    elif isinstance(argument_value, float):
      pipeline_task_spec.inputs.parameters[
          input_name].runtime_value.constant_value.double_value = argument_value
    elif isinstance(argument_value, dsl.ContainerOp):
      raise TypeError(
          'ContainerOp object {} was passed to component as an input argument. '
          'Pass a single output instead.'.format(input_name))
    else:
      raise NotImplementedError(
          'Input argument supports only the following types: PipelineParam'
          ', str, int, float. Got: "{}".'.format(argument_value))

  for output in component_spec.outputs or []:
    if type_utils.is_parameter_type(output.type):
      pipeline_task_spec.outputs.parameters[
          output.name].type = type_utils.get_parameter_type(output.type)
    else:
      pipeline_task_spec.outputs.artifacts[
          output.name].artifact_type.instance_schema = (
              type_utils.get_artifact_type_schema(output.type))

  inputs_dict = {
      input_spec.name: input_spec for input_spec in component_spec.inputs or []
  }
  outputs_dict = {
      output_spec.name: output_spec
      for output_spec in component_spec.outputs or []
  }

  def _input_artifact_uri_placeholder(input_key: str) -> str:
    if type_utils.is_parameter_type(inputs_dict[input_key].type):
      raise TypeError(
          'Input "{}" with type "{}" cannot be paired with InputUriPlaceholder.'
          .format(input_key, inputs_dict[input_key].type))
    else:
      return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

  def _input_artifact_path_placeholder(input_key: str) -> str:
    if type_utils.is_parameter_type(inputs_dict[input_key].type):
      raise TypeError(
          'Input "{}" with type "{}" cannot be paired with InputPathPlaceholder.'
          .format(input_key, inputs_dict[input_key].type))
    elif input_key in importer_spec:
      raise TypeError(
          'Input "{}" with type "{}" is not connected to any upstream output. '
          'However it is used with InputPathPlaceholder. '
          'If you want to import an existing artifact using a system-connected '
          'importer node, use InputUriPlaceholder instead. '
          'Or if you just want to pass a string parameter, use string type and '
          'InputValuePlaceholder instead.'
          .format(input_key, inputs_dict[input_key].type))
    else:
      return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key)

  def _input_parameter_placeholder(input_key: str) -> str:
    if type_utils.is_parameter_type(inputs_dict[input_key].type):
      return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)
    else:
      raise TypeError(
          'Input "{}" with type "{}" cannot be paired with InputValuePlaceholder.'
          .format(input_key, inputs_dict[input_key].type))

  def _output_artifact_uri_placeholder(output_key: str) -> str:
    if type_utils.is_parameter_type(outputs_dict[output_key].type):
      raise TypeError(
          'Output "{}" with type "{}" cannot be paired with OutputUriPlaceholder.'
          .format(output_key, outputs_dict[output_key].type))
    else:
      return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key)

  def _output_artifact_path_placeholder(output_key: str) -> str:
    return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

  def _output_parameter_path_placeholder(output_key: str) -> str:
    return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(output_key)

  def _resolve_output_path_placeholder(output_key: str) -> str:
    if type_utils.is_parameter_type(outputs_dict[output_key].type):
      return _output_parameter_path_placeholder(output_key)
    else:
      return _output_artifact_path_placeholder(output_key)

  resolved_cmd = _resolve_command_line_and_paths(
      component_spec=component_spec,
      arguments=arguments,
      input_value_generator=_input_parameter_placeholder,
      input_uri_generator=_input_artifact_uri_placeholder,
      output_uri_generator=_output_artifact_uri_placeholder,
      input_path_generator=_input_artifact_path_placeholder,
      output_path_generator=_resolve_output_path_placeholder,
  )

  container_spec = component_spec.implementation.container

  pipeline_container_spec = (
      pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec())
  pipeline_container_spec.image = container_spec.image
  pipeline_container_spec.command.extend(resolved_cmd.command)
  pipeline_container_spec.args.extend(resolved_cmd.args)

  output_uris_and_paths = resolved_cmd.output_uris.copy()
  output_uris_and_paths.update(resolved_cmd.output_paths)
  input_uris_and_paths = resolved_cmd.input_uris.copy()
  input_uris_and_paths.update(resolved_cmd.input_paths)

  old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
  dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
  task = container_op.ContainerOp(
      name=component_spec.name or _default_component_name,
      image=container_spec.image,
      command=resolved_cmd.command,
      arguments=resolved_cmd.args,
      file_outputs=output_uris_and_paths,
      artifact_argument_paths=[
          dsl.InputArgumentPath(
              argument=arguments[input_name],
              input=input_name,
              path=path,
          ) for input_name, path in input_uris_and_paths.items()
      ],
  )

  # task.name is unique at this point.
  pipeline_task_spec.task_info.name = task.name
  pipeline_task_spec.executor_label = task.name

  task.task_spec = pipeline_task_spec
  task.importer_spec = importer_spec
  task.container_spec = pipeline_container_spec
  dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

  component_meta = copy.copy(component_spec)
  task._set_metadata(component_meta)

  # Previously, ContainerOp had strict requirements for the output names, so we
  # had to convert all the names before passing them to the ContainerOp
  # constructor. Outputs with non-pythonic names could not be accessed using
  # their original names. Now ContainerOp supports any output names, so we're
  # now using the original output names. However to support legacy pipelines,
  # we're also adding output references with pythonic names.
  # TODO: Add warning when people use the legacy output names.
  output_names = [
      output_spec.name for output_spec in component_spec.outputs or []
  ]  # Stabilizing the ordering
  output_name_to_python = generate_unique_name_conversion_table(
      output_names, _sanitize_python_function_name)
  for output_name in output_names:
    pythonic_output_name = output_name_to_python[output_name]
    # Note: Some component outputs are currently missing from task.outputs
    # (e.g. MLPipeline UI Metadata)
    if pythonic_output_name not in task.outputs and output_name in task.outputs:
      task.outputs[pythonic_output_name] = task.outputs[output_name]

  if component_spec.metadata:
    annotations = component_spec.metadata.annotations or {}
    for key, value in annotations.items():
      task.add_pod_annotation(key, value)
    for key, value in (component_spec.metadata.labels or {}).items():
      task.add_pod_label(key, value)
      # Disabling the caching for the volatile components by default
    if annotations.get('volatile_component', 'false') == 'true':
      task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

  return task
Esempio n. 20
0
def create_container_op_from_component_and_arguments(
    component_spec: structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: structures.ComponentReference = None,
) -> dsl.ContainerOp:
    """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: The component reference. Optional.

  Returns:
    A ContainerOp instance.
  """

    pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec()
    pipeline_task_spec.task_info.name = component_spec.name
    # might need to append suffix to exuector_label to ensure its uniqueness?
    pipeline_task_spec.executor_label = component_spec.name

    # Check types of the reference arguments and serialize PipelineParams
    arguments = arguments.copy()
    for input_name, argument_value in arguments.items():
        if isinstance(argument_value, dsl.PipelineParam):
            input_type = component_spec._inputs_dict[input_name].type
            reference_type = argument_value.param_type
            types.verify_type_compatibility(
                reference_type, input_type,
                'Incompatible argument passed to the input "{}" of component "{}": '
                .format(input_name, component_spec.name))

            arguments[input_name] = str(argument_value)

            if type_utils.is_artifact_type(input_type):
                # argument_value.op_name could be none, in which case an importer node
                # will be inserted later. Use output_artifact_key to preserve the name
                # of pipeline parameter which is needed by importer.
                pipeline_task_spec.inputs.artifacts[
                    input_name].producer_task = (argument_value.op_name or '')
                pipeline_task_spec.inputs.artifacts[
                    input_name].output_artifact_key = (argument_value.name)
            elif type_utils.is_parameter_type(input_type):
                if argument_value.op_name:
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.producer_task = (
                            argument_value.op_name)
                    pipeline_task_spec.inputs.parameters[
                        input_name].task_output_parameter.output_parameter_key = (
                            argument_value.name)
                else:
                    pipeline_task_spec.inputs.parameters[
                        input_name].runtime_value.runtime_parameter = argument_value.name
            else:
                raise NotImplementedError(
                    'Unsupported input type: "{}". The type must be one of the following: {}.'
                    .format(input_type, type_utils.all_types()))
        elif isinstance(argument_value, str):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.string_value = argument_value
        elif isinstance(argument_value, int):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.int_value = argument_value
        elif isinstance(argument_value, float):
            pipeline_task_spec.inputs.parameters[
                input_name].runtime_value.constant_value.double_value = argument_value
        elif isinstance(argument_value, dsl.ContainerOp):
            raise TypeError(
                'ContainerOp object {} was passed to component as an input argument. '
                'Pass a single output instead.'.format(input_name))
        else:
            raise NotImplementedError(
                'Input argument supports only the following types: PipelineParam'
                ', str, int, float. Got: "{}".'.format(argument_value))

    for output in component_spec.outputs or []:
        if type_utils.is_artifact_type(output.type):
            pipeline_task_spec.outputs.artifacts[
                output.name].artifact_type.instance_schema = (
                    type_utils.get_artifact_type_schema(output.type))
        elif type_utils.is_parameter_type(output.type):
            pipeline_task_spec.outputs.parameters[
                output.name].type = type_utils.get_parameter_type(output.type)
        else:
            raise NotImplementedError(
                'Unsupported output type: "{}". The type must be one of the following: {}.'
                .format(output.type, type_utils.all_types()))

    outputs_dict = {
        output_spec.name: output_spec
        for output_spec in component_spec.outputs or []
    }

    def _input_artifact_placeholder(input_key: str) -> str:
        return "{{{{$.inputs.artifacts['{}'].uri}}}}".format(input_key)

    def _input_parameter_placeholder(input_key: str) -> str:
        return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

    def _output_artifact_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.artifacts['{}'].uri}}}}".format(output_key)

    def _output_parameter_placeholder(output_key: str) -> str:
        return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(
            output_key)

    def _resolve_output_path_placeholder(output_key: str) -> str:
        if type_utils.is_parameter_type(outputs_dict[output_key].type):
            return _output_parameter_placeholder(output_key)
        else:
            return _output_artifact_placeholder(output_key)

    # IR placeholders are decided merely based on the declared type of the input.
    # It doesn't matter wether it's InputValuePlaceholder or InputPathPlaceholder
    # from component_spec.
    placeholder_arguments = {
        input_spec.name: _input_artifact_placeholder(input_spec.name)
        if type_utils.is_artifact_type(input_spec.type) else
        _input_parameter_placeholder(input_spec.name)
        for input_spec in component_spec.inputs or []
    }

    resolved_cmd_ir = _resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=placeholder_arguments,
        input_path_generator=_input_artifact_placeholder,
        output_path_generator=_resolve_output_path_placeholder,
    )

    resolved_cmd = _resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
    )

    container_spec = component_spec.implementation.container

    pipeline_container_spec = (
        pipeline_spec_pb2.PipelineDeploymentConfig.PipelineContainerSpec())
    pipeline_container_spec.image = container_spec.image
    pipeline_container_spec.command.extend(resolved_cmd_ir.command)
    pipeline_container_spec.args.extend(resolved_cmd_ir.args)

    old_warn_value = dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True
    task = dsl.ContainerOp(
        name=component_spec.name or _default_component_name,
        image=container_spec.image,
        command=resolved_cmd.command,
        arguments=resolved_cmd.args,
        file_outputs=resolved_cmd.output_paths,
        artifact_argument_paths=[
            dsl.InputArgumentPath(
                argument=arguments[input_name],
                input=input_name,
                path=path,
            ) for input_name, path in resolved_cmd.input_paths.items()
        ],
    )

    task.task_spec = pipeline_task_spec
    task.container_spec = pipeline_container_spec
    dsl.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    component_meta = copy.copy(component_spec)
    task._set_metadata(component_meta)
    component_ref_without_spec = copy.copy(component_ref)
    component_ref_without_spec.spec = None
    task._component_ref = component_ref_without_spec

    # Previously, ContainerOp had strict requirements for the output names, so we
    # had to convert all the names before passing them to the ContainerOp
    # constructor. Outputs with non-pythonic names could not be accessed using
    # their original names. Now ContainerOp supports any output names, so we're
    # now using the original output names. However to support legacy pipelines,
    # we're also adding output references with pythonic names.
    # TODO: Add warning when people use the legacy output names.
    output_names = [
        output_spec.name for output_spec in component_spec.outputs or []
    ]  # Stabilizing the ordering
    output_name_to_python = generate_unique_name_conversion_table(
        output_names, _sanitize_python_function_name)
    for output_name in output_names:
        pythonic_output_name = output_name_to_python[output_name]
        # Note: Some component outputs are currently missing from task.outputs
        # (e.g. MLPipeline UI Metadata)
        if pythonic_output_name not in task.outputs and output_name in task.outputs:
            task.outputs[pythonic_output_name] = task.outputs[output_name]

    if component_spec.metadata:
        annotations = component_spec.metadata.annotations or {}
        for key, value in annotations.items():
            task.add_pod_annotation(key, value)
        for key, value in (component_spec.metadata.labels or {}).items():
            task.add_pod_label(key, value)
            # Disabling the caching for the volatile components by default
        if annotations.get('volatile_component', 'false') == 'true':
            task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

    return task
Esempio n. 21
0
def _create_container_op_from_component_and_arguments(
    component_spec: _structures.ComponentSpec,
    arguments: Mapping[str, Any],
    component_ref: Optional[_structures.ComponentReference] = None,
) -> _container_op.ContainerOp:
  """Instantiates ContainerOp object.

  Args:
    component_spec: The component spec object.
    arguments: The dictionary of component arguments.
    component_ref: (only for v1) The component references.

  Returns:
    A ContainerOp instance.
  """

  # Add component inputs with default value to the arguments dict if they are not
  # in the arguments dict already.
  arguments = arguments.copy()
  for input_spec in component_spec.inputs or []:
    if input_spec.name not in arguments and input_spec.default is not None:
      default_value = input_spec.default
      if input_spec.type == 'Integer':
        default_value = int(default_value)
      elif input_spec.type == 'Float':
        default_value = float(default_value)
      arguments[input_spec.name] = default_value

  # Check types of the reference arguments and serialize PipelineParams
  original_arguments = arguments
  arguments = arguments.copy()
  for input_name, argument_value in arguments.items():
    if isinstance(argument_value, _pipeline_param.PipelineParam):
      input_type = component_spec._inputs_dict[input_name].type
      argument_type = argument_value.param_type
      types.verify_type_compatibility(
          argument_type, input_type,
          'Incompatible argument passed to the input "{}" of component "{}": '
          .format(input_name, component_spec.name))

      arguments[input_name] = str(argument_value)
    if isinstance(argument_value, _container_op.ContainerOp):
      raise TypeError(
          'ContainerOp object was passed to component as an input argument. '
          'Pass a single output instead.')
  placeholder_resolver = ExtraPlaceholderResolver()
  resolved_cmd = _components._resolve_command_line_and_paths(
      component_spec=component_spec,
      arguments=arguments,
      placeholder_resolver=placeholder_resolver.resolve_placeholder,
  )

  container_spec = component_spec.implementation.container

  old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
  _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

  output_paths = collections.OrderedDict(resolved_cmd.output_paths or {})
  output_paths.update(placeholder_resolver.output_paths)
  input_paths = collections.OrderedDict(resolved_cmd.input_paths or {})
  input_paths.update(placeholder_resolver.input_paths)

  artifact_argument_paths = [
      dsl.InputArgumentPath(
          argument=arguments[input_name],
          input=input_name,
          path=path,
      ) for input_name, path in input_paths.items()
  ]

  task = _container_op.ContainerOp(
      name=component_spec.name or _components._default_component_name,
      image=container_spec.image,
      command=resolved_cmd.command,
      arguments=resolved_cmd.args,
      file_outputs=output_paths,
      artifact_argument_paths=artifact_argument_paths,
  )
  _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

  component_meta = copy.copy(component_spec)
  task._set_metadata(component_meta)
  if component_ref:
    component_ref_without_spec = copy.copy(component_ref)
    component_ref_without_spec.spec = None
    task._component_ref = component_ref_without_spec

  task._parameter_arguments = resolved_cmd.inputs_consumed_by_value

  # Previously, ContainerOp had strict requirements for the output names, so we
  # had to convert all the names before passing them to the ContainerOp
  # constructor.
  # Outputs with non-pythonic names could not be accessed using their original
  # names. Now ContainerOp supports any output names, so we're now using the
  # original output names. However to support legacy pipelines, we're also
  # adding output references with pythonic names.
  # TODO: Add warning when people use the legacy output names.
  output_names = [
      output_spec.name for output_spec in component_spec.outputs or []
  ]  # Stabilizing the ordering
  output_name_to_python = _naming.generate_unique_name_conversion_table(
      output_names, _naming._sanitize_python_function_name)
  for output_name in output_names:
    pythonic_output_name = output_name_to_python[output_name]
    # Note: Some component outputs are currently missing from task.outputs
    # (e.g. MLPipeline UI Metadata)
    if pythonic_output_name not in task.outputs and output_name in task.outputs:
      task.outputs[pythonic_output_name] = task.outputs[output_name]

  if container_spec.env:
    from kubernetes import client as k8s_client
    for name, value in container_spec.env.items():
      task.container.add_env_variable(
          k8s_client.V1EnvVar(name=name, value=value))

  if component_spec.metadata:
    annotations = component_spec.metadata.annotations or {}
    for key, value in annotations.items():
      task.add_pod_annotation(key, value)
    for key, value in (component_spec.metadata.labels or {}).items():
      task.add_pod_label(key, value)
    # Disabling the caching for the volatile components by default
    if annotations.get('volatile_component', 'false') == 'true':
      task.execution_options.caching_strategy.max_cache_staleness = 'P0D'

  _attach_v2_specs(task, component_spec, original_arguments)

  return task
Esempio n. 22
0
  def _resolve_commands_and_args_v2(
      component_spec: _structures.ComponentSpec,
      arguments: Mapping[str, Any],
  ) -> _components._ResolvedCommandLineAndPaths:
    """Resolves the command line argument placeholders for v2 (IR).

    Args:
      component_spec: The component spec object.
      arguments: The dictionary of component arguments.

    Returns:
      A named tuple: _components._ResolvedCommandLineAndPaths.
    """
    inputs_dict = {
        input_spec.name: input_spec
        for input_spec in component_spec.inputs or []
    }
    outputs_dict = {
        output_spec.name: output_spec
        for output_spec in component_spec.outputs or []
    }

    def _input_artifact_uri_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputUriPlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return _generate_input_uri_placeholder(input_key)

    def _input_artifact_path_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputPathPlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return "{{{{$.inputs.artifacts['{}'].path}}}}".format(input_key)

    def _input_parameter_placeholder(input_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and not type_utils.is_parameter_type(
          inputs_dict[input_key].type):
        raise TypeError('Input "{}" with type "{}" cannot be paired with '
                        'InputValuePlaceholder.'.format(
                            input_key, inputs_dict[input_key].type))
      else:
        return "{{{{$.inputs.parameters['{}']}}}}".format(input_key)

    def _output_artifact_uri_placeholder(output_key: str) -> str:
      if kfp.COMPILING_FOR_V2 and type_utils.is_parameter_type(
          outputs_dict[output_key].type):
        raise TypeError('Output "{}" with type "{}" cannot be paired with '
                        'OutputUriPlaceholder.'.format(
                            output_key, outputs_dict[output_key].type))
      else:
        return _generate_output_uri_placeholder(output_key)

    def _output_artifact_path_placeholder(output_key: str) -> str:
      return "{{{{$.outputs.artifacts['{}'].path}}}}".format(output_key)

    def _output_parameter_path_placeholder(output_key: str) -> str:
      return "{{{{$.outputs.parameters['{}'].output_file}}}}".format(output_key)

    def _resolve_output_path_placeholder(output_key: str) -> str:
      if type_utils.is_parameter_type(outputs_dict[output_key].type):
        return _output_parameter_path_placeholder(output_key)
      else:
        return _output_artifact_path_placeholder(output_key)

    placeholder_resolver = ExtraPlaceholderResolver()
    def _resolve_ir_placeholders_v2(
        arg,
        component_spec: _structures.ComponentSpec,
        arguments: dict,
    ) -> str:
      inputs_dict = {input_spec.name: input_spec for input_spec in component_spec.inputs or []}
      if isinstance(arg, _structures.InputValuePlaceholder):
        input_name = arg.input_name
        input_value = arguments.get(input_name, None)
        if input_value is not None:
          return _input_parameter_placeholder(input_name)
        else:
          input_spec = inputs_dict[input_name]
          if input_spec.optional:
            return None
          else:
            raise ValueError('No value provided for input {}'.format(input_name))

      elif isinstance(arg, _structures.InputUriPlaceholder):
        input_name = arg.input_name
        if input_name in arguments:
          input_uri = _input_artifact_uri_placeholder(input_name)
          return input_uri
        else:
          input_spec = inputs_dict[input_name]
          if input_spec.optional:
            return None
          else:
            raise ValueError('No value provided for input {}'.format(input_name))

      elif isinstance(arg, _structures.OutputUriPlaceholder):
        output_name = arg.output_name
        output_uri = _output_artifact_uri_placeholder(output_name)
        return output_uri

      return placeholder_resolver.resolve_placeholder(
        arg=arg,
        component_spec=component_spec,
        arguments=arguments,
      )

    resolved_cmd = _components._resolve_command_line_and_paths(
        component_spec=component_spec,
        arguments=arguments,
        input_path_generator=_input_artifact_path_placeholder,
        output_path_generator=_resolve_output_path_placeholder,
        placeholder_resolver=_resolve_ir_placeholders_v2,
    )
    return resolved_cmd