def test_component_metadata(self): """Test component decorator metadata.""" class MockContainerOp: def _set_metadata(self, component_meta): self._metadata = component_meta @component def componentA( a: {'ArtifactA': { 'file_type': 'csv' }}, b: Integer() = 12, c: {'ArtifactB': { 'path_type': 'file', 'file_type': 'tsv' }} = 'gs://hello/world' ) -> { 'model': Integer() }: return MockContainerOp() containerOp = componentA(1, 2, c=3) golden_meta = ComponentSpec(name='ComponentA', inputs=[], outputs=[]) golden_meta.inputs.append( InputSpec(name='a', type={'ArtifactA': { 'file_type': 'csv' }})) golden_meta.inputs.append( InputSpec(name='b', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } }, default="12", optional=True)) golden_meta.inputs.append( InputSpec( name='c', type={'ArtifactB': { 'path_type': 'file', 'file_type': 'tsv' }}, default='gs://hello/world', optional=True)) golden_meta.outputs.append( OutputSpec(name='model', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } })) self.assertEqual(containerOp._metadata, golden_meta)
def test_handle_constructing_graph_component(self): task1 = TaskSpec(component_ref=ComponentReference(name='comp 1'), arguments={'in1 1': 11}) task2 = TaskSpec(component_ref=ComponentReference(name='comp 2'), arguments={ 'in2 1': 21, 'in2 2': TaskOutputArgument.construct(task_id='task 1', output_name='out1 1') }) task3 = TaskSpec( component_ref=ComponentReference(name='comp 3'), arguments={ 'in3 1': TaskOutputArgument.construct(task_id='task 2', output_name='out2 1'), 'in3 2': GraphInputReference(input_name='graph in 1').as_argument() }) graph_component1 = ComponentSpec( inputs=[ InputSpec(name='graph in 1'), InputSpec(name='graph in 2'), ], outputs=[ OutputSpec(name='graph out 1'), OutputSpec(name='graph out 2'), ], implementation=GraphImplementation(graph=GraphSpec( tasks={ 'task 1': task1, 'task 2': task2, 'task 3': task3, }, output_values={ 'graph out 1': TaskOutputArgument.construct(task_id='task 3', output_name='out3 1'), 'graph out 2': TaskOutputArgument.construct(task_id='task 1', output_name='out1 2'), })))
def _create_io_from_component_spec( spec: Type[SageMakerComponentSpec]) -> IOArgs: """Parses the set of inputs and outputs from a component spec into the YAML spec form. Args: spec: A component specification definition. Returns: IOArgs: The IO arguments object filled with the fields from the component spec definition. """ inputs = [] outputs = [] args = [] # Iterate through all inputs adding them to the argument list for key, _input in spec.INPUTS.__dict__.items(): # We know all of these values are validators as we have validated the spec input_validator: SageMakerComponentInputValidator = cast( SageMakerComponentInputValidator, _input) # Map from argsparser to KFP component input_spec = InputSpec( name=key, description=input_validator.description, type=SageMakerComponentCompiler.KFP_TYPE_FROM_ARGS.get( input_validator.input_type, "String"), ) # Add optional fields if input_validator.default is not None: input_spec.__dict__["default"] = str(input_validator.default) elif not input_validator.required: # If not required and has no default, add empty string input_spec.__dict__["default"] = "" inputs.append(input_spec) # Add arguments to input list args.append(f"--{key}") args.append(InputValuePlaceholder(input_name=key)) for key, _output in spec.OUTPUTS.__dict__.items(): output_validator: SageMakerComponentOutputValidator = cast( SageMakerComponentOutputValidator, _output) outputs.append( OutputSpec(name=key, description=output_validator.description)) # Add arguments to input list args.append( f"--{key}{SageMakerComponentSpec.OUTPUT_ARGUMENT_SUFFIX}") args.append(OutputPathPlaceholder(output_name=key)) return IOArgs(inputs=inputs, outputs=outputs, args=args)
def convert_method_to_component( cls: aiplatform.base.VertexAiResourceNoun, method: Callable ) -> Callable: """Converts a MB SDK Method to a Component wrapper. The wrapper enforces the correct signature w.r.t the MB SDK. The signature is also available to inspect. For example: aiplatform.Model.deploy is converted to ModelDeployOp Which can be called: model_deploy_step = ModelDeployOp( project=project, # Pipeline parameter endpoint=endpoint_create_step.outputs['endpoint'], model=model_upload_step.outputs['model'], deployed_model_display_name='my-deployed-model', machine_type='n1-standard-4', ) Generates and invokes the following Component: name: Model-deploy inputs: - {name: project, type: String} - {name: endpoint, type: Artifact} - {name: model, type: Model} outputs: - {name: endpoint, type: Artifact} implementation: container: image: gcr.io/sashaproject-1/mb_sdk_component:latest command: - python3 - remote_runner.py - --cls_name=Model - --method_name=deploy - --method.deployed_model_display_name=my-deployed-model - --method.machine_type=n1-standard-4 args: - --resource_name_output_artifact_path - {outputPath: endpoint} - --init.project - {inputValue: project} - --method.endpoint - {inputPath: endpoint} - --init.model_name - {inputPath: model} Args: method (Callable): A MB SDK Method should_serialize_init (bool): Whether to also include the constructor params in the component Returns: A Component wrapper that accepts the MB SDK params and returns a Task. """ method_name = method.__name__ method_signature = inspect.signature(method) cls_name = cls.__name__ init_method = cls.__init__ init_signature = inspect.signature(init_method) should_serialize_init = inspect.isfunction(method) # map to store parameter names that are changed in components # this is generally used for constructor where the mb sdk takes # a resource name but the component takes a metadata entry # ie: model: system.Model -> model_name: str component_param_name_to_mb_sdk_param_name = {} # remove unused parameters method_signature = filter_signature(method_signature) init_signature = filter_signature( init_signature, is_init_signature=True, self_type=cls, component_param_name_to_mb_sdk_param_name= component_param_name_to_mb_sdk_param_name ) # use this to partition args to method or constructor init_arg_names = set(init_signature.parameters.keys() ) if should_serialize_init else set([]) # determines outputs for this component output_type = resolve_annotation(method_signature.return_annotation) output_specs = [] output_args = [] if output_type: output_metadata_name, output_metadata_type = map_resource_to_metadata_type( output_type ) output_specs.append( OutputSpec( name=output_metadata_name, type=output_metadata_type, ) ) output_args = [ '--executor_input', '{{$}}', '--resource_name_output_artifact_uri', OutputUriPlaceholder(output_name=output_metadata_name), ] def make_args(args_to_serialize: Dict[str, Dict[str, Any]]) -> List[str]: """Takes the args dictionary and returns command-line args. Args: args_to_serialize: Dictionary of format {'init': {'param_name_1': param_1}, {'method'}: {'param_name_2': param_name_2}} Returns: Serialized args compatible with Component YAML """ additional_args = [] for key, args in args_to_serialize.items(): for arg_key, value in args.items(): additional_args.append(f'--{key}.{arg_key}') additional_args.append(value) return additional_args def component_yaml_generator(**kwargs): input_specs = [] input_args = [] input_kwargs = {} serialized_args = {INIT_KEY: {}, METHOD_KEY: {}} init_kwargs = {} method_kwargs = {} for key, value in kwargs.items(): if key in init_arg_names: prefix_key = INIT_KEY init_kwargs[key] = value signature = init_signature else: prefix_key = METHOD_KEY method_kwargs[key] = value signature = method_signature # no need to add this argument because it's optional # this param is validated against the signature because # of init_kwargs, method_kwargs if value is None: continue param_type = signature.parameters[key].annotation param_type = resolve_annotation(param_type) serializer = get_serializer(param_type) if serializer: param_type = str value = serializer(value) # TODO remove PipelineParam check when Metadata Importer component available # if we serialize we need to include the argument as input # perhaps, another option is to embed in yaml as json serialized list component_param_name = component_param_name_to_mb_sdk_param_name.get( key, key ) if isinstance(value, kfp.dsl._pipeline_param.PipelineParam) or serializer: if is_mb_sdk_resource_noun_type(param_type): metadata_type = map_resource_to_metadata_type(param_type)[1] component_param_type = metadata_type else: component_param_type = 'String' input_specs.append( InputSpec( name=key, type=component_param_type, ) ) input_args.append(f'--{prefix_key}.{component_param_name}') if is_mb_sdk_resource_noun_type(param_type): input_args.append(InputUriPlaceholder(input_name=key)) else: input_args.append(InputValuePlaceholder(input_name=key)) input_kwargs[key] = value else: # Serialized arguments must always be strings value = str(value) serialized_args[prefix_key][component_param_name] = value # validate parameters if should_serialize_init: init_signature.bind(**init_kwargs) method_signature.bind(**method_kwargs) component_spec = ComponentSpec( name=f'{cls_name}-{method_name}', inputs=input_specs, outputs=output_specs, implementation=ContainerImplementation( container=ContainerSpec( image=DEFAULT_CONTAINER_IMAGE, command=[ 'python3', '-m', 'google_cloud_pipeline_components.aiplatform.remote_runner', '--cls_name', cls_name, '--method_name', method_name, ], args=make_args(serialized_args) + output_args + input_args, ) ) ) component_path = tempfile.mktemp() component_spec.save(component_path) return components.load_component_from_file(component_path)( **input_kwargs ) component_yaml_generator.__signature__ = signatures_union( init_signature, method_signature ) if should_serialize_init else method_signature # Create a docstring based on the new signature. new_args_dict = {} new_args_dict.update( filter_docstring_args( signature=method_signature, docstring=inspect.getdoc(method), is_init_signature=False ) ) if should_serialize_init: new_args_dict.update( filter_docstring_args( signature=init_signature, docstring=inspect.getdoc(init_method), is_init_signature=True ) ) component_yaml_generator.__doc__ = generate_docstring( args_dict=new_args_dict, signature=component_yaml_generator.__signature__, method_docstring=inspect.getdoc(method) ) # TODO Possibly rename method return component_yaml_generator
class ComponentCompilerTestCase(unittest.TestCase): # These should always match the dummy spec DUMMY_IO_ARGS = IOArgs( inputs=[ InputSpec( name="input1", description="The first input.", type="String", default="input1-default", ), InputSpec(name="input2", description="The second input.", type="Integer"), ], outputs=[ OutputSpec(name="output1", description="The first output."), OutputSpec(name="output2", description="The second output."), ], args=[ "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], ) DUMMY_COMPONENT_SPEC = ComponentSpec( name="Dummy component", description="Dummy description", inputs=DUMMY_IO_ARGS.inputs, outputs=DUMMY_IO_ARGS.outputs, implementation=ContainerImplementation(container=ContainerSpec( image="my-image:my-tag", command=["python3"], args=[ "fake-path", "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], )), ) EXTRA_IO_ARGS = IOArgs( inputs=[ InputSpec(name="inputStr", description="str", type="String"), InputSpec(name="inputInt", description="int", type="Integer"), InputSpec(name="inputBool", description="bool", type="Bool"), InputSpec(name="inputDict", description="dict", type="JsonObject"), InputSpec(name="inputList", description="list", type="JsonArray"), InputSpec( name="inputOptional", description="optional", type="String", default="default-string", ), InputSpec( name="inputOptionalNoDefault", description="optional", type="String", default="", ), ], outputs=[], args=[ "--inputStr", InputValuePlaceholder(input_name="inputStr"), "--inputInt", InputValuePlaceholder(input_name="inputInt"), "--inputBool", InputValuePlaceholder(input_name="inputBool"), "--inputDict", InputValuePlaceholder(input_name="inputDict"), "--inputList", InputValuePlaceholder(input_name="inputList"), "--inputOptional", InputValuePlaceholder(input_name="inputOptional"), "--inputOptionalNoDefault", InputValuePlaceholder(input_name="inputOptionalNoDefault"), ], ) @classmethod def setUpClass(cls): cls.compiler = SageMakerComponentCompiler() def test_create_io_from_component_spec(self): response = SageMakerComponentCompiler._create_io_from_component_spec( DummySpec) # type: ignore self.assertEqual(self.DUMMY_IO_ARGS, response) def test_create_io_from_component_spec_extra_types(self): response = SageMakerComponentCompiler._create_io_from_component_spec( ExtraSpec) # type: ignore self.assertEqual(self.EXTRA_IO_ARGS, response) def test_create_component_spec_composes_correctly(self): image_uri = "my-image" image_tag = "my-tag" file_path = "fake-path" expected = ComponentSpec( name="Dummy component", description="Dummy description", inputs=self.DUMMY_IO_ARGS.inputs, outputs=self.DUMMY_IO_ARGS.outputs, implementation=ContainerImplementation(container=ContainerSpec( image="my-image:my-tag", command=["python3"], args=[ "fake-path", "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], )), ) with patch( "common.component_compiler.SageMakerComponentCompiler._create_io_from_component_spec", MagicMock(return_value=self.DUMMY_IO_ARGS), ): response = SageMakerComponentCompiler._create_component_spec( DummyComponent, file_path, image_uri, image_tag) self.assertEqual(expected, response) def test_write_component(self): DummyComponent.save = MagicMock() SageMakerComponentCompiler._write_component(DummyComponent, "/tmp/fake-path") DummyComponent.save.assert_called_once_with("/tmp/fake-path")
def test_extract_component_interface(self): from typing import NamedTuple def my_func( # noqa: F722 required_param, int_param: int = 42, float_param: float = 3.14, str_param: str = 'string', bool_param: bool = True, none_param=None, custom_type_param: 'Custom type' = None, custom_struct_type_param: { 'CustomType': { 'param1': 'value1', 'param2': 'value2' } } = None, ) -> NamedTuple( 'DummyName', [ #('required_param',), # All typing.NamedTuple fields must have types ('int_param', int), ('float_param', float), ('str_param', str), ('bool_param', bool), #('custom_type_param', 'Custom type'), #SyntaxError: Forward reference must be an expression -- got 'Custom type' ('custom_type_param', 'CustomType'), #('custom_struct_type_param', {'CustomType': {'param1': 'value1', 'param2': 'value2'}}), # TypeError: NamedTuple('Name', [(f0, t0), (f1, t1), ...]); each t must be a type Got {'CustomType': {'param1': 'value1', 'param2': 'value2'}} ]): '''Function docstring''' pass component_spec = comp._python_op._extract_component_interface(my_func) from kfp.components.structures import InputSpec, OutputSpec self.assertEqual( component_spec.inputs, [ InputSpec(name='required_param'), InputSpec(name='int_param', type='Integer', default='42', optional=True), InputSpec(name='float_param', type='Float', default='3.14', optional=True), InputSpec(name='str_param', type='String', default='string', optional=True), InputSpec(name='bool_param', type='Boolean', default='True', optional=True), InputSpec(name='none_param', optional=True), # No default='None' InputSpec(name='custom_type_param', type='Custom type', optional=True), InputSpec(name='custom_struct_type_param', type={ 'CustomType': { 'param1': 'value1', 'param2': 'value2' } }, optional=True), ]) self.assertEqual( component_spec.outputs, [ OutputSpec(name='int_param', type='Integer'), OutputSpec(name='float_param', type='Float'), OutputSpec(name='str_param', type='String'), OutputSpec(name='bool_param', type='Boolean'), #OutputSpec(name='custom_type_param', type='Custom type', default='None'), OutputSpec(name='custom_type_param', type='CustomType'), #OutputSpec(name='custom_struct_type_param', type={'CustomType': {'param1': 'value1', 'param2': 'value2'}}, optional=True), ]) self.maxDiff = None self.assertDictEqual( component_spec.to_dict(), { 'name': 'My func', 'description': 'Function docstring', 'inputs': [ { 'name': 'required_param' }, { 'name': 'int_param', 'type': 'Integer', 'default': '42', 'optional': True }, { 'name': 'float_param', 'type': 'Float', 'default': '3.14', 'optional': True }, { 'name': 'str_param', 'type': 'String', 'default': 'string', 'optional': True }, { 'name': 'bool_param', 'type': 'Boolean', 'default': 'True', 'optional': True }, { 'name': 'none_param', 'optional': True }, # No default='None' { 'name': 'custom_type_param', 'type': 'Custom type', 'optional': True }, { 'name': 'custom_struct_type_param', 'type': { 'CustomType': { 'param1': 'value1', 'param2': 'value2' } }, 'optional': True }, ], 'outputs': [ { 'name': 'int_param', 'type': 'Integer' }, { 'name': 'float_param', 'type': 'Float' }, { 'name': 'str_param', 'type': 'String' }, { 'name': 'bool_param', 'type': 'Boolean' }, { 'name': 'custom_type_param', 'type': 'CustomType' }, #{'name': 'custom_struct_type_param', 'type': {'CustomType': {'param1': 'value1', 'param2': 'value2'}}, 'optional': True}, ] })
def test_to_dict(self): component_meta = ComponentSpec( name='foobar', description='foobar example', inputs=[ InputSpec(name='input1', description='input1 desc', type={ 'GCSPath': { 'bucket_type': 'directory', 'file_type': 'csv' } }, default='default1'), InputSpec(name='input2', description='input2 desc', type={ 'TFModel': { 'input_data': 'tensor', 'version': '1.8.0' } }, default='default2'), InputSpec(name='input3', description='input3 desc', type='Integer', default='default3'), ], outputs=[ OutputSpec( name='output1', description='output1 desc', type={'Schema': { 'file_type': 'tsv' }}, ) ]) golden_meta = { 'name': 'foobar', 'description': 'foobar example', 'inputs': [{ 'name': 'input1', 'description': 'input1 desc', 'type': { 'GCSPath': { 'bucket_type': 'directory', 'file_type': 'csv' } }, 'default': 'default1' }, { 'name': 'input2', 'description': 'input2 desc', 'type': { 'TFModel': { 'input_data': 'tensor', 'version': '1.8.0' } }, 'default': 'default2' }, { 'name': 'input3', 'description': 'input3 desc', 'type': 'Integer', 'default': 'default3' }], 'outputs': [{ 'name': 'output1', 'description': 'output1 desc', 'type': { 'Schema': { 'file_type': 'tsv' } }, }] } self.assertEqual(component_meta.to_dict(), golden_meta)