def test_build_runtime_config_spec(self):
        expected_dict = {
            'gcsOutputDirectory': 'gs://path',
            'parameterValues': {
                'input1': 'test',
                'input2': 2,
                'input3': [1, 2, 3]
            }
        }
        expected_spec = pipeline_spec_pb2.PipelineJob.RuntimeConfig()
        json_format.ParseDict(expected_dict, expected_spec)

        runtime_config = compiler_utils.build_runtime_config_spec(
            'gs://path', {
                'input1':
                _pipeline_param.PipelineParam(
                    name='input1', param_type='String', value='test'),
                'input2':
                _pipeline_param.PipelineParam(
                    name='input2', param_type='Integer', value=2),
                'input3':
                _pipeline_param.PipelineParam(
                    name='input3', param_type='List', value=[1, 2, 3]),
                'input4':
                _pipeline_param.PipelineParam(
                    name='input4', param_type='Double', value=None)
            })
        self.assertEqual(expected_spec, runtime_config)
 def test_additional_input_name_for_pipelineparam(self):
   self.assertEqual(
       'pipelineparam--op1-param1',
       dsl_component_spec.additional_input_name_for_pipelineparam(
           _pipeline_param.PipelineParam(name='param1', op_name='op1')))
   self.assertEqual(
       'pipelineparam--param2',
       dsl_component_spec.additional_input_name_for_pipelineparam(
           _pipeline_param.PipelineParam(name='param2')))
   self.assertEqual(
       'pipelineparam--param3',
       dsl_component_spec.additional_input_name_for_pipelineparam('param3'))
Exemple #3
0
    def test_build_task_inputs_spec(self):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='output1',
                                          param_type='Dataset',
                                          op_name='op-1'),
            _pipeline_param.PipelineParam(name='output2',
                                          param_type='Integer',
                                          op_name='op-2'),
            _pipeline_param.PipelineParam(name='output3',
                                          param_type='Model',
                                          op_name='op-3'),
            _pipeline_param.PipelineParam(name='output4',
                                          param_type='Double',
                                          op_name='op-4'),
        ]
        tasks_in_current_dag = ['op-1', 'op-2']
        expected_dict = {
            'inputs': {
                'artifacts': {
                    'op-1-output1': {
                        'taskOutputArtifact': {
                            'producerTask': 'task-op-1',
                            'outputArtifactKey': 'output1'
                        }
                    },
                    'op-3-output3': {
                        'componentInputArtifact': 'op-3-output3'
                    }
                },
                'parameters': {
                    'op-2-output2': {
                        'taskOutputParameter': {
                            'producerTask': 'task-op-2',
                            'outputParameterKey': 'output2'
                        }
                    },
                    'op-4-output4': {
                        'componentInputParameter': 'op-4-output4'
                    }
                }
            }
        }
        expected_spec = pipeline_spec_pb2.PipelineTaskSpec()
        json_format.ParseDict(expected_dict, expected_spec)

        task_spec = pipeline_spec_pb2.PipelineTaskSpec()
        dsl_component_spec.build_task_inputs_spec(task_spec, pipeline_params,
                                                  tasks_in_current_dag)

        self.assertEqual(expected_spec, task_spec)
Exemple #4
0
    def test_build_component_inputs_spec(self, is_root_component,
                                         expected_result):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='input1', param_type='Dataset'),
            _pipeline_param.PipelineParam(name='input2', param_type='Integer'),
            _pipeline_param.PipelineParam(name='input3', param_type='String'),
            _pipeline_param.PipelineParam(name='input4', param_type='Float'),
        ]
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_result, expected_spec)

        component_spec = pipeline_spec_pb2.ComponentSpec()
        dsl_component_spec.build_component_inputs_spec(component_spec,
                                                       pipeline_params,
                                                       is_root_component)

        self.assertEqual(expected_spec, component_spec)
    def test_build_component_outputs_spec(self):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='output1',
                                          param_type='Dataset'),
            _pipeline_param.PipelineParam(name='output2',
                                          param_type='Integer'),
            _pipeline_param.PipelineParam(name='output3', param_type='String'),
            _pipeline_param.PipelineParam(name='output4', param_type='Float'),
        ]
        expected_dict = {
            'outputDefinitions': {
                'artifacts': {
                    'output1': {
                        'artifactType': {
                            'instanceSchema':
                            'title: kfp.Dataset\ntype: object\nproperties:\n  '
                            'payload_format:\n    type: string\n  '
                            'container_format:\n    type: string\n'
                        }
                    }
                },
                'parameters': {
                    'output2': {
                        'type': 'INT'
                    },
                    'output3': {
                        'type': 'STRING'
                    },
                    'output4': {
                        'type': 'DOUBLE'
                    }
                }
            }
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)

        component_spec = pipeline_spec_pb2.ComponentSpec()
        dsl_component_spec.build_component_outputs_spec(
            component_spec, pipeline_params)

        self.assertEqual(expected_spec, component_spec)
Exemple #6
0
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str],
             artifact_class: Type[io_types.Artifact],
             reimport: bool = False) -> _container_op.ContainerOp:
    """dsl.importer for importing an existing artifact. Only for v2 pipeline.

  Args:
    artifact_uri: The artifact uri to import from.
    artifact_type_schema: The user specified artifact type schema of the
      artifact to be imported.
    reimport: Whether to reimport the artifact. Defaults to False.

  Returns:
    A ContainerOp instance.

  Raises:
    ValueError if the passed in artifact_uri is neither a PipelineParam nor a
      constant string value.
  """

    if isinstance(artifact_uri, _pipeline_param.PipelineParam):
        input_param = artifact_uri
    elif isinstance(artifact_uri, str):
        input_param = _pipeline_param.PipelineParam(name='uri',
                                                    value=artifact_uri,
                                                    param_type='String')
    else:
        raise ValueError(
            'Importer got unexpected artifact_uri: {} of type: {}.'.format(
                artifact_uri, type(artifact_uri)))

    old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True

    task = _container_op.ContainerOp(
        name='importer',
        image='importer_image',  # TODO: need a v1 implementation of importer.
        file_outputs={
            OUTPUT_KEY:
            "{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY)
        },
    )
    _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value

    artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class)
    task.importer_spec = _build_importer_spec(
        artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema)
    task.task_spec = _build_importer_task_spec(importer_base_name=task.name,
                                               artifact_uri=artifact_uri)
    task.component_spec = _build_importer_component_spec(
        importer_base_name=task.name,
        artifact_type_schema=artifact_type_schema)
    task.inputs = [input_param]

    return task
    def test_build_component_outputs_spec(self):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='output1',
                                          param_type='Dataset'),
            _pipeline_param.PipelineParam(name='output2',
                                          param_type='Integer'),
            _pipeline_param.PipelineParam(name='output3', param_type='String'),
            _pipeline_param.PipelineParam(name='output4', param_type='Float'),
        ]
        expected_dict = {
            'outputDefinitions': {
                'artifacts': {
                    'output1': {
                        'artifactType': {
                            'schemaTitle': 'system.Dataset',
                            'schemaVersion': '0.0.1'
                        }
                    }
                },
                'parameters': {
                    'output2': {
                        'parameterType': 'NUMBER_INTEGER'
                    },
                    'output3': {
                        'parameterType': 'STRING'
                    },
                    'output4': {
                        'parameterType': 'NUMBER_DOUBLE'
                    }
                }
            }
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)

        component_spec = pipeline_spec_pb2.ComponentSpec()
        dsl_component_spec.build_component_outputs_spec(
            component_spec, pipeline_params)

        self.assertEqual(expected_spec, component_spec)
Exemple #8
0
def update_task_inputs_spec(
    task_spec: pipeline_spec_pb2.PipelineTaskSpec,
    parent_component_inputs: pipeline_spec_pb2.ComponentInputsSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
    tasks_in_current_dag: List[str],
    input_parameters_in_current_dag: List[str],
    input_artifacts_in_current_dag: List[str],
) -> None:
    """Updates task inputs spec.

  A task input may reference an output outside its immediate DAG.
  For instance::

    random_num = random_num_op(...)
    with dsl.Condition(random_num.output > 5):
      print_op('%s > 5' % random_num.output)

  In this example, `dsl.Condition` forms a sub-DAG with one task from `print_op`
  inside the sub-DAG. The task of `print_op` references output from `random_num`
  task, which is outside the sub-DAG. When compiling to IR, such cross DAG
  reference is disallowed. So we need to "punch a hole" in the sub-DAG to make
  the input available in the sub-DAG component inputs if it's not already there,
  Next, we can call this method to fix the tasks inside the sub-DAG to make them
  reference the component inputs instead of directly referencing the original
  producer task.

  Args:
    task_spec: The task spec to fill in its inputs spec.
    parent_component_inputs: The input spec of the task's parent component.
    pipeline_params: The list of pipeline params.
    tasks_in_current_dag: The list of tasks names for tasks in the same dag.
    input_parameters_in_current_dag: The list of input parameters in the DAG
      component.
    input_artifacts_in_current_dag: The list of input artifacts in the DAG
      component.
  """
    if not hasattr(task_spec, 'inputs'):
        return

    for input_name in getattr(task_spec.inputs, 'parameters', []):

        if task_spec.inputs.parameters[input_name].WhichOneof(
                'kind') == 'task_output_parameter' and (
                    task_spec.inputs.parameters[input_name].
                    task_output_parameter.producer_task
                    not in tasks_in_current_dag):

            param = _pipeline_param.PipelineParam(
                name=task_spec.inputs.parameters[input_name].
                task_output_parameter.output_parameter_key,
                op_name=dsl_utils.remove_task_name_prefix(
                    task_spec.inputs.parameters[input_name].
                    task_output_parameter.producer_task))
            component_input_parameter = (
                additional_input_name_for_pipelineparam(param))
            assert component_input_parameter in parent_component_inputs.parameters

            task_spec.inputs.parameters[
                input_name].component_input_parameter = component_input_parameter

        elif task_spec.inputs.parameters[input_name].WhichOneof(
                'kind') == 'component_input_parameter':

            component_input_parameter = (
                task_spec.inputs.parameters[input_name].
                component_input_parameter)

            if component_input_parameter not in input_parameters_in_current_dag:
                component_input_parameter = (
                    additional_input_name_for_pipelineparam(
                        task_spec.inputs.parameters[input_name].
                        component_input_parameter))
                assert component_input_parameter in parent_component_inputs.parameters

                task_spec.inputs.parameters[
                    input_name].component_input_parameter = component_input_parameter

    for input_name in getattr(task_spec.inputs, 'artifacts', []):

        if task_spec.inputs.artifacts[input_name].WhichOneof(
                'kind') == 'task_output_artifact' and (
                    task_spec.inputs.artifacts[input_name].task_output_artifact
                    .producer_task not in tasks_in_current_dag):

            param = _pipeline_param.PipelineParam(
                name=task_spec.inputs.artifacts[input_name].
                task_output_artifact.output_artifact_key,
                op_name=dsl_utils.remove_task_name_prefix(
                    task_spec.inputs.artifacts[input_name].
                    task_output_artifact.producer_task))
            component_input_artifact = (
                additional_input_name_for_pipelineparam(param))
            assert component_input_artifact in parent_component_inputs.artifacts

            task_spec.inputs.artifacts[
                input_name].component_input_artifact = component_input_artifact

        elif task_spec.inputs.artifacts[input_name].WhichOneof(
                'kind') == 'component_input_artifact':

            component_input_artifact = (task_spec.inputs.artifacts[input_name].
                                        component_input_artifact)

            if component_input_artifact not in input_artifacts_in_current_dag:
                component_input_artifact = (
                    additional_input_name_for_pipelineparam(
                        task_spec.inputs.artifacts[input_name].
                        component_input_artifact))
                assert component_input_artifact in parent_component_inputs.artifacts

                task_spec.inputs.artifacts[
                    input_name].component_input_artifact = component_input_artifact
Exemple #9
0
class ComponentSpecTest(parameterized.TestCase):

    TEST_PIPELINE_PARAMS = [
        _pipeline_param.PipelineParam(name='output1',
                                      param_type='Dataset',
                                      op_name='op-1'),
        _pipeline_param.PipelineParam(name='output2',
                                      param_type='Integer',
                                      op_name='op-2'),
        _pipeline_param.PipelineParam(name='output3',
                                      param_type='Model',
                                      op_name='op-3'),
        _pipeline_param.PipelineParam(name='output4',
                                      param_type='Double',
                                      op_name='op-4'),
        _pipeline_param.PipelineParam(name='arg_input',
                                      param_type='String',
                                      op_name=None),
    ]

    def setUp(self):
        self.maxDiff = None

    def test_build_component_spec_from_structure(self):
        structure_component_spec = structures.ComponentSpec(
            name='component1',
            description='component1 desc',
            inputs=[
                structures.InputSpec(name='input1',
                                     description='input1 desc',
                                     type='Dataset'),
                structures.InputSpec(name='input2',
                                     description='input2 desc',
                                     type='String'),
                structures.InputSpec(name='input3',
                                     description='input3 desc',
                                     type='Integer'),
                structures.InputSpec(name='input4',
                                     description='optional inputs',
                                     optional=True),
            ],
            outputs=[
                structures.OutputSpec(name='output1',
                                      description='output1 desc',
                                      type='Model')
            ])
        expected_dict = {
            'inputDefinitions': {
                'artifacts': {
                    'input1': {
                        'artifactType': {
                            'schemaTitle': 'system.Dataset'
                        }
                    }
                },
                'parameters': {
                    'input2': {
                        'type': 'STRING'
                    },
                    'input3': {
                        'type': 'INT'
                    }
                }
            },
            'outputDefinitions': {
                'artifacts': {
                    'output1': {
                        'artifactType': {
                            'schemaTitle': 'system.Model'
                        }
                    }
                }
            },
            'executorLabel': 'exec-component1'
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)

        component_spec = (
            dsl_component_spec.build_component_spec_from_structure(
                component_spec=structure_component_spec,
                executor_label='exec-component1',
                actual_inputs=['input1', 'input2', 'input3'],
            ))

        self.assertEqual(expected_spec, component_spec)

    @parameterized.parameters(
        {
            'is_root_component': True,
            'expected_result': {
                'inputDefinitions': {
                    'artifacts': {
                        'input1': {
                            'artifactType': {
                                'schemaTitle': 'system.Dataset'
                            }
                        }
                    },
                    'parameters': {
                        'input2': {
                            'type': 'INT'
                        },
                        'input3': {
                            'type': 'STRING'
                        },
                        'input4': {
                            'type': 'DOUBLE'
                        }
                    }
                }
            }
        },
        {
            'is_root_component': False,
            'expected_result': {
                'inputDefinitions': {
                    'artifacts': {
                        'pipelineparam--input1': {
                            'artifactType': {
                                'schemaTitle': 'system.Dataset'
                            }
                        }
                    },
                    'parameters': {
                        'pipelineparam--input2': {
                            'type': 'INT'
                        },
                        'pipelineparam--input3': {
                            'type': 'STRING'
                        },
                        'pipelineparam--input4': {
                            'type': 'DOUBLE'
                        }
                    }
                }
            }
        },
    )
    def test_build_component_inputs_spec(self, is_root_component,
                                         expected_result):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='input1', param_type='Dataset'),
            _pipeline_param.PipelineParam(name='input2', param_type='Integer'),
            _pipeline_param.PipelineParam(name='input3', param_type='String'),
            _pipeline_param.PipelineParam(name='input4', param_type='Float'),
        ]
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_result, expected_spec)

        component_spec = pipeline_spec_pb2.ComponentSpec()
        dsl_component_spec.build_component_inputs_spec(component_spec,
                                                       pipeline_params,
                                                       is_root_component)

        self.assertEqual(expected_spec, component_spec)

    def test_build_component_outputs_spec(self):
        pipeline_params = [
            _pipeline_param.PipelineParam(name='output1',
                                          param_type='Dataset'),
            _pipeline_param.PipelineParam(name='output2',
                                          param_type='Integer'),
            _pipeline_param.PipelineParam(name='output3', param_type='String'),
            _pipeline_param.PipelineParam(name='output4', param_type='Float'),
        ]
        expected_dict = {
            'outputDefinitions': {
                'artifacts': {
                    'output1': {
                        'artifactType': {
                            'schemaTitle': 'system.Dataset'
                        }
                    }
                },
                'parameters': {
                    'output2': {
                        'type': 'INT'
                    },
                    'output3': {
                        'type': 'STRING'
                    },
                    'output4': {
                        'type': 'DOUBLE'
                    }
                }
            }
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)

        component_spec = pipeline_spec_pb2.ComponentSpec()
        dsl_component_spec.build_component_outputs_spec(
            component_spec, pipeline_params)

        self.assertEqual(expected_spec, component_spec)

    @parameterized.parameters(
        {
            'is_parent_component_root': True,
            'expected_result': {
                'inputs': {
                    'artifacts': {
                        'pipelineparam--op-1-output1': {
                            'taskOutputArtifact': {
                                'producerTask': 'op-1',
                                'outputArtifactKey': 'output1'
                            }
                        },
                        'pipelineparam--op-3-output3': {
                            'componentInputArtifact': 'op-3-output3'
                        }
                    },
                    'parameters': {
                        'pipelineparam--op-2-output2': {
                            'taskOutputParameter': {
                                'producerTask': 'op-2',
                                'outputParameterKey': 'output2'
                            }
                        },
                        'pipelineparam--op-4-output4': {
                            'componentInputParameter': 'op-4-output4'
                        },
                        'pipelineparam--arg_input': {
                            'componentInputParameter': 'arg_input'
                        }
                    }
                }
            }
        },
        {
            'is_parent_component_root': False,
            'expected_result': {
                'inputs': {
                    'artifacts': {
                        'pipelineparam--op-1-output1': {
                            'taskOutputArtifact': {
                                'producerTask': 'op-1',
                                'outputArtifactKey': 'output1'
                            }
                        },
                        'pipelineparam--op-3-output3': {
                            'componentInputArtifact':
                            'pipelineparam--op-3-output3'
                        }
                    },
                    'parameters': {
                        'pipelineparam--op-2-output2': {
                            'taskOutputParameter': {
                                'producerTask': 'op-2',
                                'outputParameterKey': 'output2'
                            }
                        },
                        'pipelineparam--op-4-output4': {
                            'componentInputParameter':
                            'pipelineparam--op-4-output4'
                        },
                        'pipelineparam--arg_input': {
                            'componentInputParameter':
                            'pipelineparam--arg_input'
                        }
                    }
                }
            }
        },
    )
    def test_build_task_inputs_spec(self, is_parent_component_root,
                                    expected_result):
        pipeline_params = self.TEST_PIPELINE_PARAMS
        tasks_in_current_dag = ['op-1', 'op-2']
        expected_spec = pipeline_spec_pb2.PipelineTaskSpec()
        json_format.ParseDict(expected_result, expected_spec)

        task_spec = pipeline_spec_pb2.PipelineTaskSpec()
        dsl_component_spec.build_task_inputs_spec(task_spec, pipeline_params,
                                                  tasks_in_current_dag,
                                                  is_parent_component_root)

        self.assertEqual(expected_spec, task_spec)

    @parameterized.parameters(
        {
            'original_task_spec': {},
            'parent_component_inputs': {},
            'tasks_in_current_dag': [],
            'input_parameters_in_current_dag': [],
            'input_artifacts_in_current_dag': [],
            'expected_result': {},
        },
        { # Depending on tasks & inputs within the current DAG.
            'original_task_spec': {
                'inputs': {
                    'artifacts': {
                        'pipelineparam--op-1-output1': {
                            'taskOutputArtifact': {
                                'producerTask': 'op-1',
                                'outputArtifactKey': 'output1'
                            }
                        },
                        'artifact1': {
                          'componentInputArtifact': 'artifact1'
                        },
                    },
                    'parameters': {
                        'pipelineparam--op-2-output2': {
                            'taskOutputParameter': {
                                'producerTask': 'op-2',
                                'outputParameterKey': 'output2'
                            }
                        },
                        'param1': {
                          'componentInputParameter': 'param1'
                        },
                    }
                }
            },
            'parent_component_inputs': {
              'artifacts': {
                'artifact1': {
                  'artifactType': {
                    'instanceSchema': 'dummy_schema'
                  }
                },
              },
              'parameters': {
                'param1': {
                  'type': 'STRING'
                },
              }
            },
            'tasks_in_current_dag': ['op-1', 'op-2'],
            'input_parameters_in_current_dag': ['param1'],
            'input_artifacts_in_current_dag': ['artifact1'],
            'expected_result': {
                'inputs': {
                    'artifacts': {
                        'pipelineparam--op-1-output1': {
                            'taskOutputArtifact': {
                                'producerTask': 'op-1',
                                'outputArtifactKey': 'output1'
                            }
                        },
                        'artifact1': {
                          'componentInputArtifact': 'artifact1'
                        },
                    },
                    'parameters': {
                        'pipelineparam--op-2-output2': {
                            'taskOutputParameter': {
                                'producerTask': 'op-2',
                                'outputParameterKey': 'output2'
                            }
                        },
                        'param1': {
                          'componentInputParameter': 'param1'
                        },
                    }
                }
            },
        },
        { # Depending on tasks and inputs not available in the current DAG.
            'original_task_spec': {
                'inputs': {
                    'artifacts': {
                        'pipelineparam--op-1-output1': {
                            'taskOutputArtifact': {
                                'producerTask': 'op-1',
                                'outputArtifactKey': 'output1'
                            }
                        },
                        'artifact1': {
                          'componentInputArtifact': 'artifact1'
                        },
                    },
                    'parameters': {
                        'pipelineparam--op-2-output2': {
                            'taskOutputParameter': {
                                'producerTask': 'op-2',
                                'outputParameterKey': 'output2'
                            }
                        },
                        'param1': {
                          'componentInputParameter': 'param1'
                        },
                    }
                }
            },
            'parent_component_inputs': {
                'artifacts': {
                    'pipelineparam--op-1-output1': {
                        'artifactType': {
                            'instanceSchema': 'dummy_schema'
                        }
                    },
                    'pipelineparam--artifact1': {
                      'artifactType': {
                        'instanceSchema': 'dummy_schema'
                      }
                    },
                },
                'parameters': {
                  'pipelineparam--op-2-output2' : {
                    'type': 'INT'
                  },
                  'pipelineparam--param1': {
                    'type': 'STRING'
                  },
                }
            },
            'tasks_in_current_dag': ['op-3'],
            'input_parameters_in_current_dag': ['pipelineparam--op-2-output2', 'pipelineparam--param1'],
            'input_artifacts_in_current_dag': ['pipelineparam--op-1-output1', 'pipelineparam--artifact1'],
            'expected_result': {
                'inputs': {
                    'artifacts': {
                        'pipelineparam--op-1-output1': {
                            'componentInputArtifact':
                                'pipelineparam--op-1-output1'
                        },
                        'artifact1': {
                          'componentInputArtifact': 'pipelineparam--artifact1'
                        },
                    },
                    'parameters': {
                        'pipelineparam--op-2-output2': {
                            'componentInputParameter':
                                'pipelineparam--op-2-output2'
                        },
                        'param1': {
                          'componentInputParameter': 'pipelineparam--param1'
                        },
                    }
                }
            },
        },
    )
    def test_update_task_inputs_spec(self, original_task_spec,
                                     parent_component_inputs,
                                     tasks_in_current_dag,
                                     input_parameters_in_current_dag,
                                     input_artifacts_in_current_dag,
                                     expected_result):
        pipeline_params = self.TEST_PIPELINE_PARAMS

        expected_spec = pipeline_spec_pb2.PipelineTaskSpec()
        json_format.ParseDict(expected_result, expected_spec)

        task_spec = pipeline_spec_pb2.PipelineTaskSpec()
        json_format.ParseDict(original_task_spec, task_spec)
        parent_component_inputs_spec = pipeline_spec_pb2.ComponentInputsSpec()
        json_format.ParseDict(parent_component_inputs,
                              parent_component_inputs_spec)
        dsl_component_spec.update_task_inputs_spec(
            task_spec, parent_component_inputs_spec, pipeline_params,
            tasks_in_current_dag, input_parameters_in_current_dag,
            input_artifacts_in_current_dag)

        self.assertEqual(expected_spec, task_spec)

    def test_pop_input_from_component_spec(self):
        component_spec = pipeline_spec_pb2.ComponentSpec(
            executor_label='exec-component1')

        component_spec.input_definitions.artifacts[
            'input1'].artifact_type.schema_title = 'system.Dataset'
        component_spec.input_definitions.parameters[
            'input2'].type = pipeline_spec_pb2.PrimitiveType.STRING
        component_spec.input_definitions.parameters[
            'input3'].type = pipeline_spec_pb2.PrimitiveType.DOUBLE

        # pop an artifact, and there're other inputs left
        dsl_component_spec.pop_input_from_component_spec(
            component_spec, 'input1')
        expected_dict = {
            'inputDefinitions': {
                'parameters': {
                    'input2': {
                        'type': 'STRING'
                    },
                    'input3': {
                        'type': 'DOUBLE'
                    }
                }
            },
            'executorLabel': 'exec-component1'
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, component_spec)

        # pop an parameter, and there're other inputs left
        dsl_component_spec.pop_input_from_component_spec(
            component_spec, 'input2')
        expected_dict = {
            'inputDefinitions': {
                'parameters': {
                    'input3': {
                        'type': 'DOUBLE'
                    }
                }
            },
            'executorLabel': 'exec-component1'
        }
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, component_spec)

        # pop the last input, expect no inputDefinitions
        dsl_component_spec.pop_input_from_component_spec(
            component_spec, 'input3')
        expected_dict = {'executorLabel': 'exec-component1'}
        expected_spec = pipeline_spec_pb2.ComponentSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, component_spec)

        # pop an input that doesn't exist, expect no-op.
        dsl_component_spec.pop_input_from_component_spec(
            component_spec, 'input4')
        self.assertEqual(expected_spec, component_spec)

    def test_pop_input_from_task_spec(self):
        task_spec = pipeline_spec_pb2.PipelineTaskSpec()
        task_spec.component_ref.name = 'comp-component1'
        task_spec.inputs.artifacts[
            'input1'].task_output_artifact.producer_task = 'op-1'
        task_spec.inputs.artifacts[
            'input1'].task_output_artifact.output_artifact_key = 'output1'
        task_spec.inputs.parameters[
            'input2'].task_output_parameter.producer_task = 'op-2'
        task_spec.inputs.parameters[
            'input2'].task_output_parameter.output_parameter_key = 'output2'
        task_spec.inputs.parameters[
            'input3'].component_input_parameter = 'op3-output3'

        # pop an parameter, and there're other inputs left
        dsl_component_spec.pop_input_from_task_spec(task_spec, 'input3')
        expected_dict = {
            'inputs': {
                'artifacts': {
                    'input1': {
                        'taskOutputArtifact': {
                            'producerTask': 'op-1',
                            'outputArtifactKey': 'output1'
                        }
                    }
                },
                'parameters': {
                    'input2': {
                        'taskOutputParameter': {
                            'producerTask': 'op-2',
                            'outputParameterKey': 'output2'
                        }
                    }
                }
            },
            'component_ref': {
                'name': 'comp-component1'
            }
        }
        expected_spec = pipeline_spec_pb2.PipelineTaskSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, task_spec)

        # pop an artifact, and there're other inputs left
        dsl_component_spec.pop_input_from_task_spec(task_spec, 'input1')
        expected_dict = {
            'inputs': {
                'parameters': {
                    'input2': {
                        'taskOutputParameter': {
                            'producerTask': 'op-2',
                            'outputParameterKey': 'output2'
                        }
                    }
                }
            },
            'component_ref': {
                'name': 'comp-component1'
            }
        }
        expected_spec = pipeline_spec_pb2.PipelineTaskSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, task_spec)

        # pop the last input, expect no inputDefinitions
        dsl_component_spec.pop_input_from_task_spec(task_spec, 'input2')
        expected_dict = {'component_ref': {'name': 'comp-component1'}}
        expected_spec = pipeline_spec_pb2.PipelineTaskSpec()
        json_format.ParseDict(expected_dict, expected_spec)
        self.assertEqual(expected_spec, task_spec)

        # pop an input that doesn't exist, expect no-op.
        dsl_component_spec.pop_input_from_task_spec(task_spec, 'input4')
        self.assertEqual(expected_spec, task_spec)

    def test_additional_input_name_for_pipelineparam(self):
        self.assertEqual(
            'pipelineparam--op1-param1',
            dsl_component_spec.additional_input_name_for_pipelineparam(
                _pipeline_param.PipelineParam(name='param1', op_name='op1')))
        self.assertEqual(
            'pipelineparam--param2',
            dsl_component_spec.additional_input_name_for_pipelineparam(
                _pipeline_param.PipelineParam(name='param2')))
        self.assertEqual(
            'pipelineparam--param3',
            dsl_component_spec.additional_input_name_for_pipelineparam(
                'param3'))
Exemple #10
0
def update_task_inputs_spec(
    task_spec: pipeline_spec_pb2.PipelineTaskSpec,
    parent_component_inputs: pipeline_spec_pb2.ComponentInputsSpec,
    pipeline_params: List[_pipeline_param.PipelineParam],
    tasks_in_current_dag: List[str],
    input_parameters_in_current_dag: List[str],
    input_artifacts_in_current_dag: List[str],
) -> None:
    """Updates task inputs spec.

  A task input may reference an output outside its immediate DAG.
  For instance::

    random_num = random_num_op(...)
    with dsl.Condition(random_num.output > 5):
      print_op('%s > 5' % random_num.output)

  In this example, `dsl.Condition` forms a sub-DAG with one task from `print_op`
  inside the sub-DAG. The task of `print_op` references output from `random_num`
  task, which is outside the sub-DAG. When compiling to IR, such cross DAG
  reference is disallowed. So we need to "punch a hole" in the sub-DAG to make
  the input available in the sub-DAG component inputs if it's not already there,
  Next, we can call this method to fix the tasks inside the sub-DAG to make them
  reference the component inputs instead of directly referencing the original
  producer task.

  Args:
    task_spec: The task spec to fill in its inputs spec.
    parent_component_inputs: The input spec of the task's parent component.
    pipeline_params: The list of pipeline params.
    tasks_in_current_dag: The list of tasks names for tasks in the same dag.
    input_parameters_in_current_dag: The list of input parameters in the DAG
      component.
    input_artifacts_in_current_dag: The list of input artifacts in the DAG
      component.
  """
    if not hasattr(task_spec, 'inputs'):
        return

    for input_name in getattr(task_spec.inputs, 'parameters', []):

        if task_spec.inputs.parameters[input_name].WhichOneof(
                'kind') == 'task_output_parameter' and (
                    task_spec.inputs.parameters[input_name].
                    task_output_parameter.producer_task
                    not in tasks_in_current_dag):

            param = _pipeline_param.PipelineParam(
                name=task_spec.inputs.parameters[input_name].
                task_output_parameter.output_parameter_key,
                op_name=task_spec.inputs.parameters[input_name].
                task_output_parameter.producer_task)

            component_input_parameter = (
                additional_input_name_for_pipelineparam(param.full_name))

            if component_input_parameter in parent_component_inputs.parameters:
                task_spec.inputs.parameters[
                    input_name].component_input_parameter = component_input_parameter
                continue

            # The input not found in parent's component input definitions
            # This could happen because of loop arguments variables
            param_name, subvar_name = _exclude_loop_arguments_variables(param)
            if subvar_name:
                task_spec.inputs.parameters[
                    input_name].parameter_expression_selector = (
                        'parseJson(string_value)["{}"]'.format(subvar_name))

            component_input_parameter = (
                additional_input_name_for_pipelineparam(param_name))

            assert component_input_parameter in parent_component_inputs.parameters, \
                'component_input_parameter: {} not found. All inputs: {}'.format(
                    component_input_parameter, parent_component_inputs)

            task_spec.inputs.parameters[
                input_name].component_input_parameter = component_input_parameter

        elif task_spec.inputs.parameters[input_name].WhichOneof(
                'kind') == 'component_input_parameter':

            component_input_parameter = (
                task_spec.inputs.parameters[input_name].
                component_input_parameter)

            if component_input_parameter in parent_component_inputs.parameters:
                continue

            if additional_input_name_for_pipelineparam(
                    component_input_parameter
            ) in parent_component_inputs.parameters:
                task_spec.inputs.parameters[
                    input_name].component_input_parameter = (
                        additional_input_name_for_pipelineparam(
                            component_input_parameter))
                continue

            # The input not found in parent's component input definitions
            # This could happen because of loop arguments variables
            component_input_parameter, subvar_name = _exclude_loop_arguments_variables(
                component_input_parameter)

            if subvar_name:
                task_spec.inputs.parameters[
                    input_name].parameter_expression_selector = (
                        'parseJson(string_value)["{}"]'.format(subvar_name))

            if component_input_parameter not in input_parameters_in_current_dag:
                component_input_parameter = (
                    additional_input_name_for_pipelineparam(
                        component_input_parameter))

            if component_input_parameter not in parent_component_inputs.parameters:
                component_input_parameter = (
                    additional_input_name_for_pipelineparam(
                        component_input_parameter))

            assert component_input_parameter in parent_component_inputs.parameters, \
              'component_input_parameter: {} not found. All inputs: {}'.format(
                  component_input_parameter, parent_component_inputs)

            task_spec.inputs.parameters[
                input_name].component_input_parameter = component_input_parameter

    for input_name in getattr(task_spec.inputs, 'artifacts', []):

        if task_spec.inputs.artifacts[input_name].WhichOneof(
                'kind') == 'task_output_artifact' and (
                    task_spec.inputs.artifacts[input_name].task_output_artifact
                    .producer_task not in tasks_in_current_dag):

            param = _pipeline_param.PipelineParam(
                name=task_spec.inputs.artifacts[input_name].
                task_output_artifact.output_artifact_key,
                op_name=task_spec.inputs.artifacts[input_name].
                task_output_artifact.producer_task)
            component_input_artifact = (
                additional_input_name_for_pipelineparam(param))
            assert component_input_artifact in parent_component_inputs.artifacts, \
              'component_input_artifact: {} not found. All inputs: {}'.format(
                  component_input_artifact, parent_component_inputs)

            task_spec.inputs.artifacts[
                input_name].component_input_artifact = component_input_artifact

        elif task_spec.inputs.artifacts[input_name].WhichOneof(
                'kind') == 'component_input_artifact':

            component_input_artifact = (task_spec.inputs.artifacts[input_name].
                                        component_input_artifact)

            if component_input_artifact not in input_artifacts_in_current_dag:
                component_input_artifact = (
                    additional_input_name_for_pipelineparam(
                        task_spec.inputs.artifacts[input_name].
                        component_input_artifact))
                assert component_input_artifact in parent_component_inputs.artifacts, \
                'component_input_artifact: {} not found. All inputs: {}'.format(
                    component_input_artifact, parent_component_inputs)

                task_spec.inputs.artifacts[
                    input_name].component_input_artifact = component_input_artifact
Exemple #11
0
class ImporterNodeTest(parameterized.TestCase):

  @parameterized.parameters(
      {
          # artifact_uri is a constant value
          'input_uri':
              'gs://artifact',
          'artifact_type_schema':
              pb.ArtifactTypeSchema(schema_title='system.Dataset'),
          'expected_result': {
              'artifactUri': {
                  'constantValue': {
                      'stringValue': 'gs://artifact'
                  }
              },
              'typeSchema': {
                  'schemaTitle': 'system.Dataset'
              }
          }
      },
      {
          # artifact_uri is from PipelineParam
          'input_uri':
              _pipeline_param.PipelineParam(name='uri_to_import'),
          'artifact_type_schema':
              pb.ArtifactTypeSchema(schema_title='system.Model'),
          'expected_result': {
              'artifactUri': {
                  'runtimeParameter': 'uri'
              },
              'typeSchema': {
                  'schemaTitle': 'system.Model'
              }
          },
      })
  def test_build_importer_spec(self, input_uri, artifact_type_schema,
                               expected_result):
    expected_importer_spec = pb.PipelineDeploymentConfig.ImporterSpec()
    json_format.ParseDict(expected_result, expected_importer_spec)
    importer_spec = importer_node._build_importer_spec(
        artifact_uri=input_uri, artifact_type_schema=artifact_type_schema)

    self.maxDiff = None
    self.assertEqual(expected_importer_spec, importer_spec)

  @parameterized.parameters(
      {
          # artifact_uri is a constant value
          'importer_name': 'importer-1',
          'input_uri': 'gs://artifact',
          'expected_result': {
              'taskInfo': {
                  'name': 'importer-1'
              },
              'inputs': {
                  'parameters': {
                      'uri': {
                          'runtimeValue': {
                              'constantValue': {
                                  'stringValue': 'gs://artifact'
                              }
                          }
                      }
                  }
              },
              'componentRef': {
                  'name': 'comp-importer-1'
              },
          }
      },
      {
          # artifact_uri is from PipelineParam
          'importer_name': 'importer-2',
          'input_uri': _pipeline_param.PipelineParam(name='uri_to_import'),
          'expected_result': {
              'taskInfo': {
                  'name': 'importer-2'
              },
              'inputs': {
                  'parameters': {
                      'uri': {
                          'componentInputParameter': 'uri_to_import'
                      }
                  }
              },
              'componentRef': {
                  'name': 'comp-importer-2'
              },
          },
      })
  def test_build_importer_task_spec(self, importer_name, input_uri,
                                    expected_result):
    expected_task_spec = pb.PipelineTaskSpec()
    json_format.ParseDict(expected_result, expected_task_spec)

    task_spec = importer_node._build_importer_task_spec(
        importer_base_name=importer_name, artifact_uri=input_uri)

    self.maxDiff = None
    self.assertEqual(expected_task_spec, task_spec)

  def test_build_importer_component_spec(self):
    expected_importer_component = {
        'inputDefinitions': {
            'parameters': {
                'uri': {
                    'type': 'STRING'
                }
            }
        },
        'outputDefinitions': {
            'artifacts': {
                'artifact': {
                    'artifactType': {
                        'schemaTitle': 'system.Artifact'
                    }
                }
            }
        },
        'executorLabel': 'exec-importer-1'
    }
    expected_importer_comp_spec = pb.ComponentSpec()
    json_format.ParseDict(expected_importer_component,
                          expected_importer_comp_spec)
    importer_comp_spec = importer_node._build_importer_component_spec(
        importer_base_name='importer-1',
        artifact_type_schema=pb.ArtifactTypeSchema(
            schema_title='system.Artifact'))

    self.maxDiff = None
    self.assertEqual(expected_importer_comp_spec, importer_comp_spec)

  def test_import_with_invalid_artifact_uri_value_should_fail(self):
    from kfp.dsl.io_types import Dataset
    with self.assertRaisesRegex(
        ValueError,
        "Importer got unexpected artifact_uri: 123 of type: <class 'int'>."):
      importer_node.importer(artifact_uri=123, artifact_class=Dataset)