def test_handle_parsing_predicates(self): component_text = '''\ implementation: graph: tasks: task 1: componentRef: {name: Comp 1} task 2: componentRef: {name: Comp 2} arguments: in2 1: 21 in2 2: {taskOutput: {taskId: task 1, outputName: out1 1}} isEnabled: not: and: op1: '>': op1: {taskOutput: {taskId: task 1, outputName: out1 1}} op2: 0 op2: '==': op1: {taskOutput: {taskId: task 1, outputName: out1 2}} op2: 'head' ''' struct = load_yaml(component_text) ComponentSpec.from_dict(struct)
def _compare_component(component_spec: ComponentSpec, compare_path: str): """Compare an existing specification file to a new specification. Args: component_spec: A `component.yaml` specification object. compare_path: The path of the existing specification file. """ # Write new spec into a temporary file temp_spec_file = NamedTemporaryFile(mode="w", delete=False) component_spec.save(temp_spec_file.name) ignore_image: Callable[ [str], bool] = lambda line: not line.lstrip().startswith("image:") with open(temp_spec_file.name, mode="r") as temp_file: with open(compare_path, mode="r") as existing_file: temp_lines = list(filter(ignore_image, temp_file.readlines())) existing_lines = list( filter(ignore_image, existing_file.readlines())) # Cast to list to read through generator diff_results = list( difflib.unified_diff( temp_lines, existing_lines, fromfile=temp_spec_file.name, tofile=compare_path, )) if len(diff_results) == 0: return False return "\n".join(diff_results)
def test_handle_parsing_graph_component(self): component_text = '''\ inputs: - {name: graph in 1} - {name: graph in 2} outputs: - {name: graph out 1} - {name: graph out 2} implementation: graph: tasks: task 1: componentRef: {name: Comp 1} arguments: in1 1: 11 task 2: componentRef: {name: Comp 2} arguments: in2 1: 21 in2 2: {taskOutput: {taskId: task 1, outputName: out1 1}} task 3: componentRef: {name: Comp 3} arguments: in3 1: {taskOutput: {taskId: task 2, outputName: out2 1}} in3 2: {graphInput: {inputName: graph in 1}} outputValues: graph out 1: {taskOutput: {taskId: task 3, outputName: out3 1}} graph out 2: {taskOutput: {taskId: task 1, outputName: out1 2}} ''' struct = load_yaml(component_text) ComponentSpec.from_dict(struct)
def _write_component(component_spec: ComponentSpec, output_path: str): """Write a component YAML specification to a file. Args: component_spec: A `component.yaml` specification object. output_path: The path to write the specification. """ component_spec.save(output_path)
def test_handle_parsing_task_volumes_and_mounts(self): component_text = '''\ implementation: graph: tasks: task 1: componentRef: {name: Comp 1} executionOptions: kubernetesOptions: mainContainer: volumeMounts: - name: workdir mountPath: /mnt/vol podSpec: volumes: - name: workdir emptyDir: {} ''' struct = load_yaml(component_text) component_spec = ComponentSpec.from_dict(struct) self.assertEqual( component_spec.implementation.graph.tasks['task 1']. execution_options.kubernetes_options.pod_spec.volumes[0].name, 'workdir') self.assertIsNotNone( component_spec.implementation.graph.tasks['task 1']. execution_options.kubernetes_options.pod_spec.volumes[0].empty_dir)
def _create_component_spec( component_def: Type[SageMakerComponent], component_file_path: str, component_image_uri: str, component_image_tag: str, ) -> ComponentSpec: """Create a component YAML specification object based on a component. Args: component_def: The type of the SageMaker component. component_file_path: The path to the component definition file. component_image_uri: Compiled image URI. component_image_tag: Compiled image tag. Returns: ComponentSpec: A `component.yaml` specification object. """ io_args = SageMakerComponentCompiler._create_io_from_component_spec( component_def.COMPONENT_SPEC) return ComponentSpec( name=component_def.COMPONENT_NAME, description=component_def.COMPONENT_DESCRIPTION, inputs=io_args.inputs, outputs=io_args.outputs, implementation=ContainerImplementation(container=ContainerSpec( image=f"{component_image_uri}:{component_image_tag}", command=["python3"], args=[ component_file_path, ] + io_args.args, # type: ignore )), )
def test_component_metadata_standard_type_annotation(self): """Test component decorator metadata.""" class MockContainerOp: def _set_metadata(self, component_meta): self._metadata = component_meta @component def componentA(a: float, b: List[int], c: Optional[str] = None) -> None: return MockContainerOp() containerOp = componentA('str_value', '[1,2,3]') golden_meta = ComponentSpec(name='ComponentA', inputs=[], outputs=None) golden_meta.inputs.append(InputSpec(name='a', type='Float')) golden_meta.inputs.append( InputSpec( name='b', type='typing.List[int]' if sys.version_info >= (3, 7) else 'List')) golden_meta.inputs.append( InputSpec(name='c', type='String', default=None, optional=True)) self.assertEqual(containerOp._metadata, golden_meta)
def test_decorator_metadata(self): """Test @pipeline decorator with metadata.""" @pipeline(name='p1', description='description1') def my_pipeline1(a: {'Schema': { 'file_type': 'csv' }} = 'good', b: Integer() = 12): pass golden_meta = ComponentSpec(name='p1', description='description1', inputs=[]) golden_meta.inputs.append( InputSpec(name='a', type={'Schema': { 'file_type': 'csv' }}, default='good', optional=True)) golden_meta.inputs.append( InputSpec(name='b', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } }, default="12", optional=True)) pipeline_meta = _extract_pipeline_metadata(my_pipeline1) self.assertEqual(pipeline_meta, golden_meta)
def test_create_component_spec_composes_correctly(self): image_uri = "my-image" image_tag = "my-tag" file_path = "fake-path" expected = ComponentSpec( name="Dummy component", description="Dummy description", inputs=self.DUMMY_IO_ARGS.inputs, outputs=self.DUMMY_IO_ARGS.outputs, implementation=ContainerImplementation(container=ContainerSpec( image="my-image:my-tag", command=["python3"], args=[ "fake-path", "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], )), ) with patch( "common.component_compiler.SageMakerComponentCompiler._create_io_from_component_spec", MagicMock(return_value=self.DUMMY_IO_ARGS), ): response = SageMakerComponentCompiler._create_component_spec( DummyComponent, file_path, image_uri, image_tag) self.assertEqual(expected, response)
def test_component_metadata(self): """Test component decorator metadata.""" class MockContainerOp: def _set_metadata(self, component_meta): self._metadata = component_meta @component def componentA( a: {'ArtifactA': { 'file_type': 'csv' }}, b: Integer() = 12, c: {'ArtifactB': { 'path_type': 'file', 'file_type': 'tsv' }} = 'gs://hello/world' ) -> { 'model': Integer() }: return MockContainerOp() containerOp = componentA(1, 2, c=3) golden_meta = ComponentSpec(name='ComponentA', inputs=[], outputs=[]) golden_meta.inputs.append( InputSpec(name='a', type={'ArtifactA': { 'file_type': 'csv' }})) golden_meta.inputs.append( InputSpec(name='b', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } }, default="12", optional=True)) golden_meta.inputs.append( InputSpec( name='c', type={'ArtifactB': { 'path_type': 'file', 'file_type': 'tsv' }}, default='gs://hello/world', optional=True)) golden_meta.outputs.append( OutputSpec(name='model', type={ 'Integer': { 'openapi_schema_validator': { "type": "integer" } } })) self.assertEqual(containerOp._metadata, golden_meta)
def test_fail_on_cyclic_references(self): component_text = '''\ implementation: graph: tasks: task 1: componentRef: {name: Comp 1} arguments: in1 1: {taskOutput: {taskId: task 2, outputName: out2 1}} task 2: componentRef: {name: Comp 2} arguments: in2 1: {taskOutput: {taskId: task 1, outputName: out1 1}} ''' struct = load_yaml(component_text) with self.assertRaises(TypeError): ComponentSpec.from_dict(struct)
def test_accessing_component_spec_from_task_factory(self): component_text = '''\ implementation: container: image: busybox ''' task_factory1 = comp.load_component_from_text(component_text) actual_component_spec = task_factory1.component_spec actual_component_spec_dict = actual_component_spec.to_dict() expected_component_spec_dict = load_yaml(component_text) expected_component_spec = ComponentSpec.from_dict( expected_component_spec_dict) self.assertEqual(expected_component_spec_dict, actual_component_spec_dict) self.assertEqual(expected_component_spec, task_factory1.component_spec)
def test_handle_parsing_task_execution_options_caching_strategy(self): component_text = '''\ implementation: graph: tasks: task 1: componentRef: {name: Comp 1} executionOptions: cachingStrategy: maxCacheStaleness: P30D ''' struct = load_yaml(component_text) component_spec = ComponentSpec.from_dict(struct) self.assertEqual( component_spec.implementation.graph.tasks['task 1']. execution_options.caching_strategy.max_cache_staleness, 'P30D')
def test_handle_constructing_graph_component(self): task1 = TaskSpec(component_ref=ComponentReference(name='comp 1'), arguments={'in1 1': 11}) task2 = TaskSpec(component_ref=ComponentReference(name='comp 2'), arguments={ 'in2 1': 21, 'in2 2': TaskOutputArgument.construct(task_id='task 1', output_name='out1 1') }) task3 = TaskSpec( component_ref=ComponentReference(name='comp 3'), arguments={ 'in3 1': TaskOutputArgument.construct(task_id='task 2', output_name='out2 1'), 'in3 2': GraphInputReference(input_name='graph in 1').as_argument() }) graph_component1 = ComponentSpec( inputs=[ InputSpec(name='graph in 1'), InputSpec(name='graph in 2'), ], outputs=[ OutputSpec(name='graph out 1'), OutputSpec(name='graph out 2'), ], implementation=GraphImplementation(graph=GraphSpec( tasks={ 'task 1': task1, 'task 2': task2, 'task 3': task3, }, output_values={ 'graph out 1': TaskOutputArgument.construct(task_id='task 3', output_name='out3 1'), 'graph out 2': TaskOutputArgument.construct(task_id='task 1', output_name='out1 2'), })))
def test_handle_parsing_task_container_spec_options(self): component_text = '''\ implementation: graph: tasks: task 1: componentRef: {name: Comp 1} executionOptions: kubernetesOptions: mainContainer: resources: requests: memory: 1024Mi cpu: 200m ''' struct = load_yaml(component_text) component_spec = ComponentSpec.from_dict(struct) self.assertEqual( component_spec.implementation.graph.tasks['task 1']. execution_options.kubernetes_options.main_container.resources. requests['memory'], '1024Mi')
def from_v1_component_spec( cls, v1_component_spec: v1_structures.ComponentSpec) -> 'ComponentSpec': """Converts V1 ComponentSpec to V2 ComponentSpec. Args: v1_component_spec: The V1 ComponentSpec. Returns: Component spec in the form of V2 ComponentSpec. Raises: ValueError: If implementation is not found. TypeError: if any argument is neither a str nor Dict. """ component_dict = v1_component_spec.to_dict() if component_dict.get('implementation') is None: raise ValueError('Implementation field not found') if 'container' not in component_dict.get('implementation'): raise NotImplementedError def _transform_arg( arg: Union[str, Dict[str, str]]) -> ValidCommandArgs: if isinstance(arg, str): return arg if 'inputValue' in arg: return InputValuePlaceholder( input_name=utils.sanitize_input_name(arg['inputValue'])) if 'inputPath' in arg: return InputPathPlaceholder( input_name=utils.sanitize_input_name(arg['inputPath'])) if 'inputUri' in arg: return InputUriPlaceholder( input_name=utils.sanitize_input_name(arg['inputUri'])) if 'outputPath' in arg: return OutputPathPlaceholder( output_name=utils.sanitize_input_name(arg['outputPath'])) if 'outputUri' in arg: return OutputUriPlaceholder( output_name=utils.sanitize_input_name(arg['outputUri'])) if 'if' in arg: if_placeholder_values = arg['if'] if_placeholder_values_then = list( if_placeholder_values['then']) try: if_placeholder_values_else = list( if_placeholder_values['else']) except KeyError: if_placeholder_values_else = [] IfPresentPlaceholderStructure.update_forward_refs() return IfPresentPlaceholder( if_structure=IfPresentPlaceholderStructure( input_name=utils.sanitize_input_name( if_placeholder_values['cond']['isPresent']), then=list( _transform_arg(val) for val in if_placeholder_values_then), otherwise=list( _transform_arg(val) for val in if_placeholder_values_else))) if 'concat' in arg: ConcatPlaceholder.update_forward_refs() return ConcatPlaceholder(concat=list( _transform_arg(val) for val in arg['concat'])) raise ValueError( f'Unexpected command/argument type: "{arg}" of type "{type(arg)}".' ) implementation = component_dict['implementation']['container'] implementation['commands'] = [ _transform_arg(command) for command in implementation.pop('command', []) ] implementation['arguments'] = [ _transform_arg(command) for command in implementation.pop('args', []) ] implementation['env'] = { key: _transform_arg(command) for key, command in implementation.pop('env', {}).items() } container_spec = ContainerSpec(image=implementation['image']) # Workaround for https://github.com/samuelcolvin/pydantic/issues/2079 def _copy_model(obj): if isinstance(obj, BaseModel): return obj.copy(deep=True) return obj # Must assign these after the constructor call, otherwise it won't work. if implementation['commands']: container_spec.commands = [ _copy_model(cmd) for cmd in implementation['commands'] ] if implementation['arguments']: container_spec.arguments = [ _copy_model(arg) for arg in implementation['arguments'] ] if implementation['env']: container_spec.env = { k: _copy_model(v) for k, v in implementation['env'] } return ComponentSpec( name=component_dict.get('name', 'name'), description=component_dict.get('description'), implementation=Implementation(container=container_spec), inputs={ utils.sanitize_input_name(spec['name']): InputSpec(type=spec.get('type', 'Artifact'), default=spec.get('default', None)) for spec in component_dict.get('inputs', []) }, outputs={ utils.sanitize_input_name(spec['name']): OutputSpec(type=spec.get('type', 'Artifact')) for spec in component_dict.get('outputs', []) })
def component_yaml_generator(**kwargs): input_specs = [] input_args = [] input_kwargs = {} serialized_args = {INIT_KEY: {}, METHOD_KEY: {}} init_kwargs = {} method_kwargs = {} for key, value in kwargs.items(): if key in init_arg_names: prefix_key = INIT_KEY init_kwargs[key] = value signature = init_signature else: prefix_key = METHOD_KEY method_kwargs[key] = value signature = method_signature # no need to add this argument because it's optional # this param is validated against the signature because # of init_kwargs, method_kwargs if value is None: continue param_type = signature.parameters[key].annotation param_type = resolve_annotation(param_type) serializer = get_serializer(param_type) if serializer: param_type = str value = serializer(value) # TODO remove PipelineParam check when Metadata Importer component available # if we serialize we need to include the argument as input # perhaps, another option is to embed in yaml as json serialized list component_param_name = component_param_name_to_mb_sdk_param_name.get( key, key ) if isinstance(value, kfp.dsl._pipeline_param.PipelineParam) or serializer: if is_mb_sdk_resource_noun_type(param_type): metadata_type = map_resource_to_metadata_type(param_type)[1] component_param_type = metadata_type else: component_param_type = 'String' input_specs.append( InputSpec( name=key, type=component_param_type, ) ) input_args.append(f'--{prefix_key}.{component_param_name}') if is_mb_sdk_resource_noun_type(param_type): input_args.append(InputUriPlaceholder(input_name=key)) else: input_args.append(InputValuePlaceholder(input_name=key)) input_kwargs[key] = value else: # Serialized arguments must always be strings value = str(value) serialized_args[prefix_key][component_param_name] = value # validate parameters if should_serialize_init: init_signature.bind(**init_kwargs) method_signature.bind(**method_kwargs) component_spec = ComponentSpec( name=f'{cls_name}-{method_name}', inputs=input_specs, outputs=output_specs, implementation=ContainerImplementation( container=ContainerSpec( image=DEFAULT_CONTAINER_IMAGE, command=[ 'python3', '-m', 'google_cloud_pipeline_components.aiplatform.remote_runner', '--cls_name', cls_name, '--method_name', method_name, ], args=make_args(serialized_args) + output_args + input_args, ) ) ) component_path = tempfile.mktemp() component_spec.save(component_path) return components.load_component_from_file(component_path)( **input_kwargs )
class ComponentCompilerTestCase(unittest.TestCase): # These should always match the dummy spec DUMMY_IO_ARGS = IOArgs( inputs=[ InputSpec( name="input1", description="The first input.", type="String", default="input1-default", ), InputSpec(name="input2", description="The second input.", type="Integer"), ], outputs=[ OutputSpec(name="output1", description="The first output."), OutputSpec(name="output2", description="The second output."), ], args=[ "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], ) DUMMY_COMPONENT_SPEC = ComponentSpec( name="Dummy component", description="Dummy description", inputs=DUMMY_IO_ARGS.inputs, outputs=DUMMY_IO_ARGS.outputs, implementation=ContainerImplementation(container=ContainerSpec( image="my-image:my-tag", command=["python3"], args=[ "fake-path", "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], )), ) EXTRA_IO_ARGS = IOArgs( inputs=[ InputSpec(name="inputStr", description="str", type="String"), InputSpec(name="inputInt", description="int", type="Integer"), InputSpec(name="inputBool", description="bool", type="Bool"), InputSpec(name="inputDict", description="dict", type="JsonObject"), InputSpec(name="inputList", description="list", type="JsonArray"), InputSpec( name="inputOptional", description="optional", type="String", default="default-string", ), InputSpec( name="inputOptionalNoDefault", description="optional", type="String", default="", ), ], outputs=[], args=[ "--inputStr", InputValuePlaceholder(input_name="inputStr"), "--inputInt", InputValuePlaceholder(input_name="inputInt"), "--inputBool", InputValuePlaceholder(input_name="inputBool"), "--inputDict", InputValuePlaceholder(input_name="inputDict"), "--inputList", InputValuePlaceholder(input_name="inputList"), "--inputOptional", InputValuePlaceholder(input_name="inputOptional"), "--inputOptionalNoDefault", InputValuePlaceholder(input_name="inputOptionalNoDefault"), ], ) @classmethod def setUpClass(cls): cls.compiler = SageMakerComponentCompiler() def test_create_io_from_component_spec(self): response = SageMakerComponentCompiler._create_io_from_component_spec( DummySpec) # type: ignore self.assertEqual(self.DUMMY_IO_ARGS, response) def test_create_io_from_component_spec_extra_types(self): response = SageMakerComponentCompiler._create_io_from_component_spec( ExtraSpec) # type: ignore self.assertEqual(self.EXTRA_IO_ARGS, response) def test_create_component_spec_composes_correctly(self): image_uri = "my-image" image_tag = "my-tag" file_path = "fake-path" expected = ComponentSpec( name="Dummy component", description="Dummy description", inputs=self.DUMMY_IO_ARGS.inputs, outputs=self.DUMMY_IO_ARGS.outputs, implementation=ContainerImplementation(container=ContainerSpec( image="my-image:my-tag", command=["python3"], args=[ "fake-path", "--input1", InputValuePlaceholder(input_name="input1"), "--input2", InputValuePlaceholder(input_name="input2"), "--output1_output_path", OutputPathPlaceholder(output_name="output1"), "--output2_output_path", OutputPathPlaceholder(output_name="output2"), ], )), ) with patch( "common.component_compiler.SageMakerComponentCompiler._create_io_from_component_spec", MagicMock(return_value=self.DUMMY_IO_ARGS), ): response = SageMakerComponentCompiler._create_component_spec( DummyComponent, file_path, image_uri, image_tag) self.assertEqual(expected, response) def test_write_component(self): DummyComponent.save = MagicMock() SageMakerComponentCompiler._write_component(DummyComponent, "/tmp/fake-path") DummyComponent.save.assert_called_once_with("/tmp/fake-path")
def test_to_dict(self): component_meta = ComponentSpec( name='foobar', description='foobar example', inputs=[ InputSpec(name='input1', description='input1 desc', type={ 'GCSPath': { 'bucket_type': 'directory', 'file_type': 'csv' } }, default='default1'), InputSpec(name='input2', description='input2 desc', type={ 'TFModel': { 'input_data': 'tensor', 'version': '1.8.0' } }, default='default2'), InputSpec(name='input3', description='input3 desc', type='Integer', default='default3'), ], outputs=[ OutputSpec( name='output1', description='output1 desc', type={'Schema': { 'file_type': 'tsv' }}, ) ]) golden_meta = { 'name': 'foobar', 'description': 'foobar example', 'inputs': [{ 'name': 'input1', 'description': 'input1 desc', 'type': { 'GCSPath': { 'bucket_type': 'directory', 'file_type': 'csv' } }, 'default': 'default1' }, { 'name': 'input2', 'description': 'input2 desc', 'type': { 'TFModel': { 'input_data': 'tensor', 'version': '1.8.0' } }, 'default': 'default2' }, { 'name': 'input3', 'description': 'input3 desc', 'type': 'Integer', 'default': 'default3' }], 'outputs': [{ 'name': 'output1', 'description': 'output1 desc', 'type': { 'Schema': { 'file_type': 'tsv' } }, }] } self.assertEqual(component_meta.to_dict(), golden_meta)