def test_build_runtime_config_spec(self): expected_dict = { 'gcsOutputDirectory': 'gs://path', 'parameterValues': { 'input1': 'test', 'input2': 2, 'input3': [1, 2, 3] } } expected_spec = pipeline_spec_pb2.PipelineJob.RuntimeConfig() json_format.ParseDict(expected_dict, expected_spec) runtime_config = compiler_utils.build_runtime_config_spec( 'gs://path', { 'input1': _pipeline_param.PipelineParam( name='input1', param_type='String', value='test'), 'input2': _pipeline_param.PipelineParam( name='input2', param_type='Integer', value=2), 'input3': _pipeline_param.PipelineParam( name='input3', param_type='List', value=[1, 2, 3]), 'input4': _pipeline_param.PipelineParam( name='input4', param_type='Double', value=None) }) self.assertEqual(expected_spec, runtime_config)
def test_additional_input_name_for_pipelineparam(self): self.assertEqual( 'pipelineparam--op1-param1', dsl_component_spec.additional_input_name_for_pipelineparam( _pipeline_param.PipelineParam(name='param1', op_name='op1'))) self.assertEqual( 'pipelineparam--param2', dsl_component_spec.additional_input_name_for_pipelineparam( _pipeline_param.PipelineParam(name='param2'))) self.assertEqual( 'pipelineparam--param3', dsl_component_spec.additional_input_name_for_pipelineparam('param3'))
def test_build_task_inputs_spec(self): pipeline_params = [ _pipeline_param.PipelineParam(name='output1', param_type='Dataset', op_name='op-1'), _pipeline_param.PipelineParam(name='output2', param_type='Integer', op_name='op-2'), _pipeline_param.PipelineParam(name='output3', param_type='Model', op_name='op-3'), _pipeline_param.PipelineParam(name='output4', param_type='Double', op_name='op-4'), ] tasks_in_current_dag = ['op-1', 'op-2'] expected_dict = { 'inputs': { 'artifacts': { 'op-1-output1': { 'taskOutputArtifact': { 'producerTask': 'task-op-1', 'outputArtifactKey': 'output1' } }, 'op-3-output3': { 'componentInputArtifact': 'op-3-output3' } }, 'parameters': { 'op-2-output2': { 'taskOutputParameter': { 'producerTask': 'task-op-2', 'outputParameterKey': 'output2' } }, 'op-4-output4': { 'componentInputParameter': 'op-4-output4' } } } } expected_spec = pipeline_spec_pb2.PipelineTaskSpec() json_format.ParseDict(expected_dict, expected_spec) task_spec = pipeline_spec_pb2.PipelineTaskSpec() dsl_component_spec.build_task_inputs_spec(task_spec, pipeline_params, tasks_in_current_dag) self.assertEqual(expected_spec, task_spec)
def test_build_component_inputs_spec(self, is_root_component, expected_result): pipeline_params = [ _pipeline_param.PipelineParam(name='input1', param_type='Dataset'), _pipeline_param.PipelineParam(name='input2', param_type='Integer'), _pipeline_param.PipelineParam(name='input3', param_type='String'), _pipeline_param.PipelineParam(name='input4', param_type='Float'), ] expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_result, expected_spec) component_spec = pipeline_spec_pb2.ComponentSpec() dsl_component_spec.build_component_inputs_spec(component_spec, pipeline_params, is_root_component) self.assertEqual(expected_spec, component_spec)
def test_build_component_outputs_spec(self): pipeline_params = [ _pipeline_param.PipelineParam(name='output1', param_type='Dataset'), _pipeline_param.PipelineParam(name='output2', param_type='Integer'), _pipeline_param.PipelineParam(name='output3', param_type='String'), _pipeline_param.PipelineParam(name='output4', param_type='Float'), ] expected_dict = { 'outputDefinitions': { 'artifacts': { 'output1': { 'artifactType': { 'instanceSchema': 'title: kfp.Dataset\ntype: object\nproperties:\n ' 'payload_format:\n type: string\n ' 'container_format:\n type: string\n' } } }, 'parameters': { 'output2': { 'type': 'INT' }, 'output3': { 'type': 'STRING' }, 'output4': { 'type': 'DOUBLE' } } } } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = pipeline_spec_pb2.ComponentSpec() dsl_component_spec.build_component_outputs_spec( component_spec, pipeline_params) self.assertEqual(expected_spec, component_spec)
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str], artifact_class: Type[io_types.Artifact], reimport: bool = False) -> _container_op.ContainerOp: """dsl.importer for importing an existing artifact. Only for v2 pipeline. Args: artifact_uri: The artifact uri to import from. artifact_type_schema: The user specified artifact type schema of the artifact to be imported. reimport: Whether to reimport the artifact. Defaults to False. Returns: A ContainerOp instance. Raises: ValueError if the passed in artifact_uri is neither a PipelineParam nor a constant string value. """ if isinstance(artifact_uri, _pipeline_param.PipelineParam): input_param = artifact_uri elif isinstance(artifact_uri, str): input_param = _pipeline_param.PipelineParam(name='uri', value=artifact_uri, param_type='String') else: raise ValueError( 'Importer got unexpected artifact_uri: {} of type: {}.'.format( artifact_uri, type(artifact_uri))) old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True task = _container_op.ContainerOp( name='importer', image='importer_image', # TODO: need a v1 implementation of importer. file_outputs={ OUTPUT_KEY: "{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY) }, ) _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class) task.importer_spec = _build_importer_spec( artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema) task.task_spec = _build_importer_task_spec(importer_base_name=task.name, artifact_uri=artifact_uri) task.component_spec = _build_importer_component_spec( importer_base_name=task.name, artifact_type_schema=artifact_type_schema) task.inputs = [input_param] return task
def test_build_component_outputs_spec(self): pipeline_params = [ _pipeline_param.PipelineParam(name='output1', param_type='Dataset'), _pipeline_param.PipelineParam(name='output2', param_type='Integer'), _pipeline_param.PipelineParam(name='output3', param_type='String'), _pipeline_param.PipelineParam(name='output4', param_type='Float'), ] expected_dict = { 'outputDefinitions': { 'artifacts': { 'output1': { 'artifactType': { 'schemaTitle': 'system.Dataset', 'schemaVersion': '0.0.1' } } }, 'parameters': { 'output2': { 'parameterType': 'NUMBER_INTEGER' }, 'output3': { 'parameterType': 'STRING' }, 'output4': { 'parameterType': 'NUMBER_DOUBLE' } } } } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = pipeline_spec_pb2.ComponentSpec() dsl_component_spec.build_component_outputs_spec( component_spec, pipeline_params) self.assertEqual(expected_spec, component_spec)
def update_task_inputs_spec( task_spec: pipeline_spec_pb2.PipelineTaskSpec, parent_component_inputs: pipeline_spec_pb2.ComponentInputsSpec, pipeline_params: List[_pipeline_param.PipelineParam], tasks_in_current_dag: List[str], input_parameters_in_current_dag: List[str], input_artifacts_in_current_dag: List[str], ) -> None: """Updates task inputs spec. A task input may reference an output outside its immediate DAG. For instance:: random_num = random_num_op(...) with dsl.Condition(random_num.output > 5): print_op('%s > 5' % random_num.output) In this example, `dsl.Condition` forms a sub-DAG with one task from `print_op` inside the sub-DAG. The task of `print_op` references output from `random_num` task, which is outside the sub-DAG. When compiling to IR, such cross DAG reference is disallowed. So we need to "punch a hole" in the sub-DAG to make the input available in the sub-DAG component inputs if it's not already there, Next, we can call this method to fix the tasks inside the sub-DAG to make them reference the component inputs instead of directly referencing the original producer task. Args: task_spec: The task spec to fill in its inputs spec. parent_component_inputs: The input spec of the task's parent component. pipeline_params: The list of pipeline params. tasks_in_current_dag: The list of tasks names for tasks in the same dag. input_parameters_in_current_dag: The list of input parameters in the DAG component. input_artifacts_in_current_dag: The list of input artifacts in the DAG component. """ if not hasattr(task_spec, 'inputs'): return for input_name in getattr(task_spec.inputs, 'parameters', []): if task_spec.inputs.parameters[input_name].WhichOneof( 'kind') == 'task_output_parameter' and ( task_spec.inputs.parameters[input_name]. task_output_parameter.producer_task not in tasks_in_current_dag): param = _pipeline_param.PipelineParam( name=task_spec.inputs.parameters[input_name]. task_output_parameter.output_parameter_key, op_name=dsl_utils.remove_task_name_prefix( task_spec.inputs.parameters[input_name]. task_output_parameter.producer_task)) component_input_parameter = ( additional_input_name_for_pipelineparam(param)) assert component_input_parameter in parent_component_inputs.parameters task_spec.inputs.parameters[ input_name].component_input_parameter = component_input_parameter elif task_spec.inputs.parameters[input_name].WhichOneof( 'kind') == 'component_input_parameter': component_input_parameter = ( task_spec.inputs.parameters[input_name]. component_input_parameter) if component_input_parameter not in input_parameters_in_current_dag: component_input_parameter = ( additional_input_name_for_pipelineparam( task_spec.inputs.parameters[input_name]. component_input_parameter)) assert component_input_parameter in parent_component_inputs.parameters task_spec.inputs.parameters[ input_name].component_input_parameter = component_input_parameter for input_name in getattr(task_spec.inputs, 'artifacts', []): if task_spec.inputs.artifacts[input_name].WhichOneof( 'kind') == 'task_output_artifact' and ( task_spec.inputs.artifacts[input_name].task_output_artifact .producer_task not in tasks_in_current_dag): param = _pipeline_param.PipelineParam( name=task_spec.inputs.artifacts[input_name]. task_output_artifact.output_artifact_key, op_name=dsl_utils.remove_task_name_prefix( task_spec.inputs.artifacts[input_name]. task_output_artifact.producer_task)) component_input_artifact = ( additional_input_name_for_pipelineparam(param)) assert component_input_artifact in parent_component_inputs.artifacts task_spec.inputs.artifacts[ input_name].component_input_artifact = component_input_artifact elif task_spec.inputs.artifacts[input_name].WhichOneof( 'kind') == 'component_input_artifact': component_input_artifact = (task_spec.inputs.artifacts[input_name]. component_input_artifact) if component_input_artifact not in input_artifacts_in_current_dag: component_input_artifact = ( additional_input_name_for_pipelineparam( task_spec.inputs.artifacts[input_name]. component_input_artifact)) assert component_input_artifact in parent_component_inputs.artifacts task_spec.inputs.artifacts[ input_name].component_input_artifact = component_input_artifact
class ComponentSpecTest(parameterized.TestCase): TEST_PIPELINE_PARAMS = [ _pipeline_param.PipelineParam(name='output1', param_type='Dataset', op_name='op-1'), _pipeline_param.PipelineParam(name='output2', param_type='Integer', op_name='op-2'), _pipeline_param.PipelineParam(name='output3', param_type='Model', op_name='op-3'), _pipeline_param.PipelineParam(name='output4', param_type='Double', op_name='op-4'), _pipeline_param.PipelineParam(name='arg_input', param_type='String', op_name=None), ] def setUp(self): self.maxDiff = None def test_build_component_spec_from_structure(self): structure_component_spec = structures.ComponentSpec( name='component1', description='component1 desc', inputs=[ structures.InputSpec(name='input1', description='input1 desc', type='Dataset'), structures.InputSpec(name='input2', description='input2 desc', type='String'), structures.InputSpec(name='input3', description='input3 desc', type='Integer'), structures.InputSpec(name='input4', description='optional inputs', optional=True), ], outputs=[ structures.OutputSpec(name='output1', description='output1 desc', type='Model') ]) expected_dict = { 'inputDefinitions': { 'artifacts': { 'input1': { 'artifactType': { 'schemaTitle': 'system.Dataset' } } }, 'parameters': { 'input2': { 'type': 'STRING' }, 'input3': { 'type': 'INT' } } }, 'outputDefinitions': { 'artifacts': { 'output1': { 'artifactType': { 'schemaTitle': 'system.Model' } } } }, 'executorLabel': 'exec-component1' } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = ( dsl_component_spec.build_component_spec_from_structure( component_spec=structure_component_spec, executor_label='exec-component1', actual_inputs=['input1', 'input2', 'input3'], )) self.assertEqual(expected_spec, component_spec) @parameterized.parameters( { 'is_root_component': True, 'expected_result': { 'inputDefinitions': { 'artifacts': { 'input1': { 'artifactType': { 'schemaTitle': 'system.Dataset' } } }, 'parameters': { 'input2': { 'type': 'INT' }, 'input3': { 'type': 'STRING' }, 'input4': { 'type': 'DOUBLE' } } } } }, { 'is_root_component': False, 'expected_result': { 'inputDefinitions': { 'artifacts': { 'pipelineparam--input1': { 'artifactType': { 'schemaTitle': 'system.Dataset' } } }, 'parameters': { 'pipelineparam--input2': { 'type': 'INT' }, 'pipelineparam--input3': { 'type': 'STRING' }, 'pipelineparam--input4': { 'type': 'DOUBLE' } } } } }, ) def test_build_component_inputs_spec(self, is_root_component, expected_result): pipeline_params = [ _pipeline_param.PipelineParam(name='input1', param_type='Dataset'), _pipeline_param.PipelineParam(name='input2', param_type='Integer'), _pipeline_param.PipelineParam(name='input3', param_type='String'), _pipeline_param.PipelineParam(name='input4', param_type='Float'), ] expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_result, expected_spec) component_spec = pipeline_spec_pb2.ComponentSpec() dsl_component_spec.build_component_inputs_spec(component_spec, pipeline_params, is_root_component) self.assertEqual(expected_spec, component_spec) def test_build_component_outputs_spec(self): pipeline_params = [ _pipeline_param.PipelineParam(name='output1', param_type='Dataset'), _pipeline_param.PipelineParam(name='output2', param_type='Integer'), _pipeline_param.PipelineParam(name='output3', param_type='String'), _pipeline_param.PipelineParam(name='output4', param_type='Float'), ] expected_dict = { 'outputDefinitions': { 'artifacts': { 'output1': { 'artifactType': { 'schemaTitle': 'system.Dataset' } } }, 'parameters': { 'output2': { 'type': 'INT' }, 'output3': { 'type': 'STRING' }, 'output4': { 'type': 'DOUBLE' } } } } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) component_spec = pipeline_spec_pb2.ComponentSpec() dsl_component_spec.build_component_outputs_spec( component_spec, pipeline_params) self.assertEqual(expected_spec, component_spec) @parameterized.parameters( { 'is_parent_component_root': True, 'expected_result': { 'inputs': { 'artifacts': { 'pipelineparam--op-1-output1': { 'taskOutputArtifact': { 'producerTask': 'op-1', 'outputArtifactKey': 'output1' } }, 'pipelineparam--op-3-output3': { 'componentInputArtifact': 'op-3-output3' } }, 'parameters': { 'pipelineparam--op-2-output2': { 'taskOutputParameter': { 'producerTask': 'op-2', 'outputParameterKey': 'output2' } }, 'pipelineparam--op-4-output4': { 'componentInputParameter': 'op-4-output4' }, 'pipelineparam--arg_input': { 'componentInputParameter': 'arg_input' } } } } }, { 'is_parent_component_root': False, 'expected_result': { 'inputs': { 'artifacts': { 'pipelineparam--op-1-output1': { 'taskOutputArtifact': { 'producerTask': 'op-1', 'outputArtifactKey': 'output1' } }, 'pipelineparam--op-3-output3': { 'componentInputArtifact': 'pipelineparam--op-3-output3' } }, 'parameters': { 'pipelineparam--op-2-output2': { 'taskOutputParameter': { 'producerTask': 'op-2', 'outputParameterKey': 'output2' } }, 'pipelineparam--op-4-output4': { 'componentInputParameter': 'pipelineparam--op-4-output4' }, 'pipelineparam--arg_input': { 'componentInputParameter': 'pipelineparam--arg_input' } } } } }, ) def test_build_task_inputs_spec(self, is_parent_component_root, expected_result): pipeline_params = self.TEST_PIPELINE_PARAMS tasks_in_current_dag = ['op-1', 'op-2'] expected_spec = pipeline_spec_pb2.PipelineTaskSpec() json_format.ParseDict(expected_result, expected_spec) task_spec = pipeline_spec_pb2.PipelineTaskSpec() dsl_component_spec.build_task_inputs_spec(task_spec, pipeline_params, tasks_in_current_dag, is_parent_component_root) self.assertEqual(expected_spec, task_spec) @parameterized.parameters( { 'original_task_spec': {}, 'parent_component_inputs': {}, 'tasks_in_current_dag': [], 'input_parameters_in_current_dag': [], 'input_artifacts_in_current_dag': [], 'expected_result': {}, }, { # Depending on tasks & inputs within the current DAG. 'original_task_spec': { 'inputs': { 'artifacts': { 'pipelineparam--op-1-output1': { 'taskOutputArtifact': { 'producerTask': 'op-1', 'outputArtifactKey': 'output1' } }, 'artifact1': { 'componentInputArtifact': 'artifact1' }, }, 'parameters': { 'pipelineparam--op-2-output2': { 'taskOutputParameter': { 'producerTask': 'op-2', 'outputParameterKey': 'output2' } }, 'param1': { 'componentInputParameter': 'param1' }, } } }, 'parent_component_inputs': { 'artifacts': { 'artifact1': { 'artifactType': { 'instanceSchema': 'dummy_schema' } }, }, 'parameters': { 'param1': { 'type': 'STRING' }, } }, 'tasks_in_current_dag': ['op-1', 'op-2'], 'input_parameters_in_current_dag': ['param1'], 'input_artifacts_in_current_dag': ['artifact1'], 'expected_result': { 'inputs': { 'artifacts': { 'pipelineparam--op-1-output1': { 'taskOutputArtifact': { 'producerTask': 'op-1', 'outputArtifactKey': 'output1' } }, 'artifact1': { 'componentInputArtifact': 'artifact1' }, }, 'parameters': { 'pipelineparam--op-2-output2': { 'taskOutputParameter': { 'producerTask': 'op-2', 'outputParameterKey': 'output2' } }, 'param1': { 'componentInputParameter': 'param1' }, } } }, }, { # Depending on tasks and inputs not available in the current DAG. 'original_task_spec': { 'inputs': { 'artifacts': { 'pipelineparam--op-1-output1': { 'taskOutputArtifact': { 'producerTask': 'op-1', 'outputArtifactKey': 'output1' } }, 'artifact1': { 'componentInputArtifact': 'artifact1' }, }, 'parameters': { 'pipelineparam--op-2-output2': { 'taskOutputParameter': { 'producerTask': 'op-2', 'outputParameterKey': 'output2' } }, 'param1': { 'componentInputParameter': 'param1' }, } } }, 'parent_component_inputs': { 'artifacts': { 'pipelineparam--op-1-output1': { 'artifactType': { 'instanceSchema': 'dummy_schema' } }, 'pipelineparam--artifact1': { 'artifactType': { 'instanceSchema': 'dummy_schema' } }, }, 'parameters': { 'pipelineparam--op-2-output2' : { 'type': 'INT' }, 'pipelineparam--param1': { 'type': 'STRING' }, } }, 'tasks_in_current_dag': ['op-3'], 'input_parameters_in_current_dag': ['pipelineparam--op-2-output2', 'pipelineparam--param1'], 'input_artifacts_in_current_dag': ['pipelineparam--op-1-output1', 'pipelineparam--artifact1'], 'expected_result': { 'inputs': { 'artifacts': { 'pipelineparam--op-1-output1': { 'componentInputArtifact': 'pipelineparam--op-1-output1' }, 'artifact1': { 'componentInputArtifact': 'pipelineparam--artifact1' }, }, 'parameters': { 'pipelineparam--op-2-output2': { 'componentInputParameter': 'pipelineparam--op-2-output2' }, 'param1': { 'componentInputParameter': 'pipelineparam--param1' }, } } }, }, ) def test_update_task_inputs_spec(self, original_task_spec, parent_component_inputs, tasks_in_current_dag, input_parameters_in_current_dag, input_artifacts_in_current_dag, expected_result): pipeline_params = self.TEST_PIPELINE_PARAMS expected_spec = pipeline_spec_pb2.PipelineTaskSpec() json_format.ParseDict(expected_result, expected_spec) task_spec = pipeline_spec_pb2.PipelineTaskSpec() json_format.ParseDict(original_task_spec, task_spec) parent_component_inputs_spec = pipeline_spec_pb2.ComponentInputsSpec() json_format.ParseDict(parent_component_inputs, parent_component_inputs_spec) dsl_component_spec.update_task_inputs_spec( task_spec, parent_component_inputs_spec, pipeline_params, tasks_in_current_dag, input_parameters_in_current_dag, input_artifacts_in_current_dag) self.assertEqual(expected_spec, task_spec) def test_pop_input_from_component_spec(self): component_spec = pipeline_spec_pb2.ComponentSpec( executor_label='exec-component1') component_spec.input_definitions.artifacts[ 'input1'].artifact_type.schema_title = 'system.Dataset' component_spec.input_definitions.parameters[ 'input2'].type = pipeline_spec_pb2.PrimitiveType.STRING component_spec.input_definitions.parameters[ 'input3'].type = pipeline_spec_pb2.PrimitiveType.DOUBLE # pop an artifact, and there're other inputs left dsl_component_spec.pop_input_from_component_spec( component_spec, 'input1') expected_dict = { 'inputDefinitions': { 'parameters': { 'input2': { 'type': 'STRING' }, 'input3': { 'type': 'DOUBLE' } } }, 'executorLabel': 'exec-component1' } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, component_spec) # pop an parameter, and there're other inputs left dsl_component_spec.pop_input_from_component_spec( component_spec, 'input2') expected_dict = { 'inputDefinitions': { 'parameters': { 'input3': { 'type': 'DOUBLE' } } }, 'executorLabel': 'exec-component1' } expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, component_spec) # pop the last input, expect no inputDefinitions dsl_component_spec.pop_input_from_component_spec( component_spec, 'input3') expected_dict = {'executorLabel': 'exec-component1'} expected_spec = pipeline_spec_pb2.ComponentSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, component_spec) # pop an input that doesn't exist, expect no-op. dsl_component_spec.pop_input_from_component_spec( component_spec, 'input4') self.assertEqual(expected_spec, component_spec) def test_pop_input_from_task_spec(self): task_spec = pipeline_spec_pb2.PipelineTaskSpec() task_spec.component_ref.name = 'comp-component1' task_spec.inputs.artifacts[ 'input1'].task_output_artifact.producer_task = 'op-1' task_spec.inputs.artifacts[ 'input1'].task_output_artifact.output_artifact_key = 'output1' task_spec.inputs.parameters[ 'input2'].task_output_parameter.producer_task = 'op-2' task_spec.inputs.parameters[ 'input2'].task_output_parameter.output_parameter_key = 'output2' task_spec.inputs.parameters[ 'input3'].component_input_parameter = 'op3-output3' # pop an parameter, and there're other inputs left dsl_component_spec.pop_input_from_task_spec(task_spec, 'input3') expected_dict = { 'inputs': { 'artifacts': { 'input1': { 'taskOutputArtifact': { 'producerTask': 'op-1', 'outputArtifactKey': 'output1' } } }, 'parameters': { 'input2': { 'taskOutputParameter': { 'producerTask': 'op-2', 'outputParameterKey': 'output2' } } } }, 'component_ref': { 'name': 'comp-component1' } } expected_spec = pipeline_spec_pb2.PipelineTaskSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, task_spec) # pop an artifact, and there're other inputs left dsl_component_spec.pop_input_from_task_spec(task_spec, 'input1') expected_dict = { 'inputs': { 'parameters': { 'input2': { 'taskOutputParameter': { 'producerTask': 'op-2', 'outputParameterKey': 'output2' } } } }, 'component_ref': { 'name': 'comp-component1' } } expected_spec = pipeline_spec_pb2.PipelineTaskSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, task_spec) # pop the last input, expect no inputDefinitions dsl_component_spec.pop_input_from_task_spec(task_spec, 'input2') expected_dict = {'component_ref': {'name': 'comp-component1'}} expected_spec = pipeline_spec_pb2.PipelineTaskSpec() json_format.ParseDict(expected_dict, expected_spec) self.assertEqual(expected_spec, task_spec) # pop an input that doesn't exist, expect no-op. dsl_component_spec.pop_input_from_task_spec(task_spec, 'input4') self.assertEqual(expected_spec, task_spec) def test_additional_input_name_for_pipelineparam(self): self.assertEqual( 'pipelineparam--op1-param1', dsl_component_spec.additional_input_name_for_pipelineparam( _pipeline_param.PipelineParam(name='param1', op_name='op1'))) self.assertEqual( 'pipelineparam--param2', dsl_component_spec.additional_input_name_for_pipelineparam( _pipeline_param.PipelineParam(name='param2'))) self.assertEqual( 'pipelineparam--param3', dsl_component_spec.additional_input_name_for_pipelineparam( 'param3'))
def update_task_inputs_spec( task_spec: pipeline_spec_pb2.PipelineTaskSpec, parent_component_inputs: pipeline_spec_pb2.ComponentInputsSpec, pipeline_params: List[_pipeline_param.PipelineParam], tasks_in_current_dag: List[str], input_parameters_in_current_dag: List[str], input_artifacts_in_current_dag: List[str], ) -> None: """Updates task inputs spec. A task input may reference an output outside its immediate DAG. For instance:: random_num = random_num_op(...) with dsl.Condition(random_num.output > 5): print_op('%s > 5' % random_num.output) In this example, `dsl.Condition` forms a sub-DAG with one task from `print_op` inside the sub-DAG. The task of `print_op` references output from `random_num` task, which is outside the sub-DAG. When compiling to IR, such cross DAG reference is disallowed. So we need to "punch a hole" in the sub-DAG to make the input available in the sub-DAG component inputs if it's not already there, Next, we can call this method to fix the tasks inside the sub-DAG to make them reference the component inputs instead of directly referencing the original producer task. Args: task_spec: The task spec to fill in its inputs spec. parent_component_inputs: The input spec of the task's parent component. pipeline_params: The list of pipeline params. tasks_in_current_dag: The list of tasks names for tasks in the same dag. input_parameters_in_current_dag: The list of input parameters in the DAG component. input_artifacts_in_current_dag: The list of input artifacts in the DAG component. """ if not hasattr(task_spec, 'inputs'): return for input_name in getattr(task_spec.inputs, 'parameters', []): if task_spec.inputs.parameters[input_name].WhichOneof( 'kind') == 'task_output_parameter' and ( task_spec.inputs.parameters[input_name]. task_output_parameter.producer_task not in tasks_in_current_dag): param = _pipeline_param.PipelineParam( name=task_spec.inputs.parameters[input_name]. task_output_parameter.output_parameter_key, op_name=task_spec.inputs.parameters[input_name]. task_output_parameter.producer_task) component_input_parameter = ( additional_input_name_for_pipelineparam(param.full_name)) if component_input_parameter in parent_component_inputs.parameters: task_spec.inputs.parameters[ input_name].component_input_parameter = component_input_parameter continue # The input not found in parent's component input definitions # This could happen because of loop arguments variables param_name, subvar_name = _exclude_loop_arguments_variables(param) if subvar_name: task_spec.inputs.parameters[ input_name].parameter_expression_selector = ( 'parseJson(string_value)["{}"]'.format(subvar_name)) component_input_parameter = ( additional_input_name_for_pipelineparam(param_name)) assert component_input_parameter in parent_component_inputs.parameters, \ 'component_input_parameter: {} not found. All inputs: {}'.format( component_input_parameter, parent_component_inputs) task_spec.inputs.parameters[ input_name].component_input_parameter = component_input_parameter elif task_spec.inputs.parameters[input_name].WhichOneof( 'kind') == 'component_input_parameter': component_input_parameter = ( task_spec.inputs.parameters[input_name]. component_input_parameter) if component_input_parameter in parent_component_inputs.parameters: continue if additional_input_name_for_pipelineparam( component_input_parameter ) in parent_component_inputs.parameters: task_spec.inputs.parameters[ input_name].component_input_parameter = ( additional_input_name_for_pipelineparam( component_input_parameter)) continue # The input not found in parent's component input definitions # This could happen because of loop arguments variables component_input_parameter, subvar_name = _exclude_loop_arguments_variables( component_input_parameter) if subvar_name: task_spec.inputs.parameters[ input_name].parameter_expression_selector = ( 'parseJson(string_value)["{}"]'.format(subvar_name)) if component_input_parameter not in input_parameters_in_current_dag: component_input_parameter = ( additional_input_name_for_pipelineparam( component_input_parameter)) if component_input_parameter not in parent_component_inputs.parameters: component_input_parameter = ( additional_input_name_for_pipelineparam( component_input_parameter)) assert component_input_parameter in parent_component_inputs.parameters, \ 'component_input_parameter: {} not found. All inputs: {}'.format( component_input_parameter, parent_component_inputs) task_spec.inputs.parameters[ input_name].component_input_parameter = component_input_parameter for input_name in getattr(task_spec.inputs, 'artifacts', []): if task_spec.inputs.artifacts[input_name].WhichOneof( 'kind') == 'task_output_artifact' and ( task_spec.inputs.artifacts[input_name].task_output_artifact .producer_task not in tasks_in_current_dag): param = _pipeline_param.PipelineParam( name=task_spec.inputs.artifacts[input_name]. task_output_artifact.output_artifact_key, op_name=task_spec.inputs.artifacts[input_name]. task_output_artifact.producer_task) component_input_artifact = ( additional_input_name_for_pipelineparam(param)) assert component_input_artifact in parent_component_inputs.artifacts, \ 'component_input_artifact: {} not found. All inputs: {}'.format( component_input_artifact, parent_component_inputs) task_spec.inputs.artifacts[ input_name].component_input_artifact = component_input_artifact elif task_spec.inputs.artifacts[input_name].WhichOneof( 'kind') == 'component_input_artifact': component_input_artifact = (task_spec.inputs.artifacts[input_name]. component_input_artifact) if component_input_artifact not in input_artifacts_in_current_dag: component_input_artifact = ( additional_input_name_for_pipelineparam( task_spec.inputs.artifacts[input_name]. component_input_artifact)) assert component_input_artifact in parent_component_inputs.artifacts, \ 'component_input_artifact: {} not found. All inputs: {}'.format( component_input_artifact, parent_component_inputs) task_spec.inputs.artifacts[ input_name].component_input_artifact = component_input_artifact
class ImporterNodeTest(parameterized.TestCase): @parameterized.parameters( { # artifact_uri is a constant value 'input_uri': 'gs://artifact', 'artifact_type_schema': pb.ArtifactTypeSchema(schema_title='system.Dataset'), 'expected_result': { 'artifactUri': { 'constantValue': { 'stringValue': 'gs://artifact' } }, 'typeSchema': { 'schemaTitle': 'system.Dataset' } } }, { # artifact_uri is from PipelineParam 'input_uri': _pipeline_param.PipelineParam(name='uri_to_import'), 'artifact_type_schema': pb.ArtifactTypeSchema(schema_title='system.Model'), 'expected_result': { 'artifactUri': { 'runtimeParameter': 'uri' }, 'typeSchema': { 'schemaTitle': 'system.Model' } }, }) def test_build_importer_spec(self, input_uri, artifact_type_schema, expected_result): expected_importer_spec = pb.PipelineDeploymentConfig.ImporterSpec() json_format.ParseDict(expected_result, expected_importer_spec) importer_spec = importer_node._build_importer_spec( artifact_uri=input_uri, artifact_type_schema=artifact_type_schema) self.maxDiff = None self.assertEqual(expected_importer_spec, importer_spec) @parameterized.parameters( { # artifact_uri is a constant value 'importer_name': 'importer-1', 'input_uri': 'gs://artifact', 'expected_result': { 'taskInfo': { 'name': 'importer-1' }, 'inputs': { 'parameters': { 'uri': { 'runtimeValue': { 'constantValue': { 'stringValue': 'gs://artifact' } } } } }, 'componentRef': { 'name': 'comp-importer-1' }, } }, { # artifact_uri is from PipelineParam 'importer_name': 'importer-2', 'input_uri': _pipeline_param.PipelineParam(name='uri_to_import'), 'expected_result': { 'taskInfo': { 'name': 'importer-2' }, 'inputs': { 'parameters': { 'uri': { 'componentInputParameter': 'uri_to_import' } } }, 'componentRef': { 'name': 'comp-importer-2' }, }, }) def test_build_importer_task_spec(self, importer_name, input_uri, expected_result): expected_task_spec = pb.PipelineTaskSpec() json_format.ParseDict(expected_result, expected_task_spec) task_spec = importer_node._build_importer_task_spec( importer_base_name=importer_name, artifact_uri=input_uri) self.maxDiff = None self.assertEqual(expected_task_spec, task_spec) def test_build_importer_component_spec(self): expected_importer_component = { 'inputDefinitions': { 'parameters': { 'uri': { 'type': 'STRING' } } }, 'outputDefinitions': { 'artifacts': { 'artifact': { 'artifactType': { 'schemaTitle': 'system.Artifact' } } } }, 'executorLabel': 'exec-importer-1' } expected_importer_comp_spec = pb.ComponentSpec() json_format.ParseDict(expected_importer_component, expected_importer_comp_spec) importer_comp_spec = importer_node._build_importer_component_spec( importer_base_name='importer-1', artifact_type_schema=pb.ArtifactTypeSchema( schema_title='system.Artifact')) self.maxDiff = None self.assertEqual(expected_importer_comp_spec, importer_comp_spec) def test_import_with_invalid_artifact_uri_value_should_fail(self): from kfp.dsl.io_types import Dataset with self.assertRaisesRegex( ValueError, "Importer got unexpected artifact_uri: 123 of type: <class 'int'>."): importer_node.importer(artifact_uri=123, artifact_class=Dataset)