def build_component_spec_from_structure( component_spec: structures.ComponentSpec, ) -> pipeline_spec_pb2.ComponentSpec: """Builds an IR ComponentSpec instance from structures.ComponentSpec. Args: component_spec: The structure component spec. Returns: An instance of IR ComponentSpec. """ result = pipeline_spec_pb2.ComponentSpec() result.executor_label = dsl_utils.sanitize_executor_label(component_spec.name) for input_spec in component_spec.inputs or []: if type_utils.is_parameter_type(input_spec.type): result.input_definitions.parameters[ input_spec.name].type = type_utils.get_parameter_type(input_spec.type) else: result.input_definitions.artifacts[ input_spec.name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(input_spec.type)) for output_spec in component_spec.outputs or []: if type_utils.is_parameter_type(output_spec.type): result.output_definitions.parameters[ output_spec.name].type = type_utils.get_parameter_type( output_spec.type) else: result.output_definitions.artifacts[ output_spec.name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(output_spec.type)) return result
def test_get_artifact_type_schema(self): self.assertEqual( 'title: kfp.Model\ntype: object\nproperties:\n framework:\n type: string\n framework_version:\n type: string\n', type_utils.get_artifact_type_schema('Model')) self.assertEqual( 'title: kfp.Dataset\ntype: object\nproperties:\n payload_format:\n type: string\n container_format:\n type: string', type_utils.get_artifact_type_schema('Dataset')) for type_name in _UNKNOWN_ARTIFACT_TYPES: self.assertEqual('title: kfp.Artifact\ntype: object\n', type_utils.get_artifact_type_schema(type_name))
def build_component_inputs_spec( component_spec: pipeline_spec_pb2.ComponentSpec, pipeline_params: List[_pipeline_param.PipelineParam], is_root_component: bool, ) -> None: """Builds component inputs spec from pipeline params. Args: component_spec: The component spec to fill in its inputs spec. pipeline_params: The list of pipeline params. is_root_component: Whether the component is the root. """ for param in pipeline_params: param_name = param.full_name if _for_loop.LoopArguments.name_is_loop_argument(param_name): param.param_type = param.param_type or 'String' input_name = ( param_name if is_root_component else additional_input_name_for_pipelineparam(param_name)) if type_utils.is_parameter_type(param.param_type): component_spec.input_definitions.parameters[ input_name].type = type_utils.get_parameter_type(param.param_type) elif input_name not in getattr(component_spec.input_definitions, 'parameters', []): component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(param.param_type))
def test_get_artifact_type_schema(self): self.assertTrue( 'title: kfp.Model' in type_utils.get_artifact_type_schema('Model')) self.assertTrue('title: kfp.Dataset' in type_utils.get_artifact_type_schema('Dataset')) self.assertTrue('title: kfp.Metrics' in type_utils.get_artifact_type_schema('Metrics')) self.assertTrue('title: kfp.ClassificationMetrics' in type_utils. get_artifact_type_schema('ClassificationMetrics')) self.assertTrue( 'title: kfp.SlicedClassificationMetrics' in type_utils.get_artifact_type_schema('SlicedClassificationMetrics')) for type_name in _UNKNOWN_ARTIFACT_TYPES: self.assertEqual('title: kfp.Artifact\ntype: object\n', type_utils.get_artifact_type_schema(type_name))
def build_component_spec_from_structure( component_spec: structures.ComponentSpec, executor_label: str, actual_inputs: List[str], ) -> pipeline_spec_pb2.ComponentSpec: """Builds an IR ComponentSpec instance from structures.ComponentSpec. Args: component_spec: The structure component spec. executor_label: The executor label. actual_inputs: The actual arugments passed to the task. This is used as a short term workaround to support optional inputs in component spec IR. Returns: An instance of IR ComponentSpec. """ result = pipeline_spec_pb2.ComponentSpec() result.executor_label = executor_label for input_spec in component_spec.inputs or []: # skip inputs not present if input_spec.name not in actual_inputs: continue if type_utils.is_parameter_type(input_spec.type): result.input_definitions.parameters[ input_spec.name].type = type_utils.get_parameter_type( input_spec.type) else: result.input_definitions.artifacts[ input_spec.name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(input_spec.type)) for output_spec in component_spec.outputs or []: if type_utils.is_parameter_type(output_spec.type): result.output_definitions.parameters[ output_spec.name].type = type_utils.get_parameter_type( output_spec.type) else: result.output_definitions.artifacts[ output_spec.name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(output_spec.type)) return result
def importer(artifact_uri: Union[_pipeline_param.PipelineParam, str], artifact_class: Type[io_types.Artifact], reimport: bool = False) -> _container_op.ContainerOp: """dsl.importer for importing an existing artifact. Only for v2 pipeline. Args: artifact_uri: The artifact uri to import from. artifact_type_schema: The user specified artifact type schema of the artifact to be imported. reimport: Whether to reimport the artifact. Defaults to False. Returns: A ContainerOp instance. Raises: ValueError if the passed in artifact_uri is neither a PipelineParam nor a constant string value. """ if isinstance(artifact_uri, _pipeline_param.PipelineParam): input_param = artifact_uri elif isinstance(artifact_uri, str): input_param = _pipeline_param.PipelineParam(name='uri', value=artifact_uri, param_type='String') else: raise ValueError( 'Importer got unexpected artifact_uri: {} of type: {}.'.format( artifact_uri, type(artifact_uri))) old_warn_value = _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = True task = _container_op.ContainerOp( name='importer', image='importer_image', # TODO: need a v1 implementation of importer. file_outputs={ OUTPUT_KEY: "{{{{$.outputs.artifacts['{}'].uri}}}}".format(OUTPUT_KEY) }, ) _container_op.ContainerOp._DISABLE_REUSABLE_COMPONENT_WARNING = old_warn_value artifact_type_schema = type_utils.get_artifact_type_schema(artifact_class) task.importer_spec = _build_importer_spec( artifact_uri=artifact_uri, artifact_type_schema=artifact_type_schema) task.task_spec = _build_importer_task_spec(importer_base_name=task.name, artifact_uri=artifact_uri) task.component_spec = _build_importer_component_spec( importer_base_name=task.name, artifact_type_schema=artifact_type_schema) task.inputs = [input_param] return task
def build_component_outputs_spec( component_spec: pipeline_spec_pb2.ComponentSpec, pipeline_params: List[_pipeline_param.PipelineParam], ) -> None: """Builds component outputs spec from pipeline params. Args: component_spec: The component spec to fill in its outputs spec. pipeline_params: The list of pipeline params. """ for param in pipeline_params or []: output_name = param.full_name if type_utils.is_parameter_type(param.param_type): component_spec.output_definitions.parameters[ output_name].type = type_utils.get_parameter_type(param.param_type) elif output_name not in getattr(component_spec.output_definitions, 'parameters', []): component_spec.output_definitions.artifacts[ output_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(param.param_type))
def build_component_inputs_spec( component_spec: pipeline_spec_pb2.ComponentSpec, pipeline_params: List[_pipeline_param.PipelineParam], ) -> None: """Builds component inputs spec from pipeline params. Args: component_spec: The component spec to fill in its inputs spec. pipeline_params: The list of pipeline params. """ for param in pipeline_params: input_name = param.full_name if type_utils.is_parameter_type(param.param_type): component_spec.input_definitions.parameters[ input_name].type = type_utils.get_parameter_type( param.param_type) else: component_spec.input_definitions.artifacts[ input_name].artifact_type.instance_schema = ( type_utils.get_artifact_type_schema(param.param_type))
def test_get_artifact_type_schema(self, artifact_class_or_type_name, expected_result): self.assertEqual( expected_result, type_utils.get_artifact_type_schema(artifact_class_or_type_name))
def _get_custom_job_op( task_name: str, job_spec: Dict[str, Any], input_artifacts: Optional[Dict[str, dsl.PipelineParam]] = None, input_parameters: Optional[Dict[str, _ValueOrPipelineParam]] = None, output_artifacts: Optional[Dict[str, Type[io_types.Artifact]]] = None, output_parameters: Optional[Dict[str, Any]] = None, ) -> AiPlatformCustomJobOp: """Gets an AiPlatformCustomJobOp from job spec and I/O definition.""" pipeline_task_spec = pipeline_spec_pb2.PipelineTaskSpec() pipeline_component_spec = pipeline_spec_pb2.ComponentSpec() pipeline_task_spec.task_info.CopyFrom( pipeline_spec_pb2.PipelineTaskInfo( name=dsl_utils.sanitize_task_name(task_name))) # Iterate through the inputs/outputs declaration to get pipeline component # spec. for input_name, param in input_parameters.items(): if isinstance(param, dsl.PipelineParam): pipeline_component_spec.input_definitions.parameters[ input_name].type = type_utils.get_parameter_type( param.param_type) else: pipeline_component_spec.input_definitions.parameters[ input_name].type = type_utils.get_parameter_type(type(param)) for input_name, art in input_artifacts.items(): if not isinstance(art, dsl.PipelineParam): raise RuntimeError( 'Get unresolved input artifact for input %s. Input ' 'artifacts must be connected to a producer task.' % input_name) pipeline_component_spec.input_definitions.artifacts[ input_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(art.param_type)) for output_name, param_type in output_parameters.items(): pipeline_component_spec.output_definitions.parameters[ output_name].type = type_utils.get_parameter_type(param_type) for output_name, artifact_type in output_artifacts.items(): pipeline_component_spec.output_definitions.artifacts[ output_name].artifact_type.CopyFrom( type_utils.get_artifact_type_schema(artifact_type)) pipeline_component_spec.executor_label = dsl_utils.sanitize_executor_label( task_name) # Iterate through the inputs/outputs specs to get pipeline task spec. for input_name, param in input_parameters.items(): if isinstance(param, dsl.PipelineParam) and param.op_name: # If the param has a valid op_name, this should be a pipeline parameter # produced by an upstream task. pipeline_task_spec.inputs.parameters[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec( task_output_parameter=pipeline_spec_pb2.TaskInputsSpec. InputParameterSpec.TaskOutputParameterSpec( producer_task=dsl_utils.sanitize_task_name( param.op_name), output_parameter_key=param.name))) elif isinstance(param, dsl.PipelineParam) and not param.op_name: # If a valid op_name is missing, this should be a pipeline parameter. pipeline_task_spec.inputs.parameters[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec( component_input_parameter=param.name)) else: # If this is not a pipeline param, then it should be a value. pipeline_task_spec.inputs.parameters[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputParameterSpec( runtime_value=pipeline_spec_pb2.ValueOrRuntimeParameter( constant_value=dsl_utils.get_value(param)))) for input_name, art in input_artifacts.items(): if art.op_name: # If the param has a valid op_name, this should be an artifact produced # by an upstream task. pipeline_task_spec.inputs.artifacts[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec( task_output_artifact=pipeline_spec_pb2.TaskInputsSpec. InputArtifactSpec.TaskOutputArtifactSpec( producer_task=dsl_utils.sanitize_task_name( art.op_name), output_artifact_key=art.name))) else: # Otherwise, this should be from the input of the subdag. pipeline_task_spec.inputs.artifacts[input_name].CopyFrom( pipeline_spec_pb2.TaskInputsSpec.InputArtifactSpec( component_input_artifact=art.name)) # TODO: Add task dependencies/trigger policies/caching/iterator pipeline_task_spec.component_ref.name = dsl_utils.sanitize_component_name( task_name) # Construct dummy I/O declaration for the op. # TODO: resolve name conflict instead of raising errors. dummy_outputs = collections.OrderedDict() for output_name, _ in output_artifacts.items(): dummy_outputs[output_name] = _DUMMY_PATH for output_name, _ in output_parameters.items(): if output_name in dummy_outputs: raise KeyError( 'Got name collision for output key %s. Consider renaming ' 'either output parameters or output ' 'artifacts.' % output_name) dummy_outputs[output_name] = _DUMMY_PATH dummy_inputs = collections.OrderedDict() for input_name, art in input_artifacts.items(): dummy_inputs[input_name] = _DUMMY_PATH for input_name, param in input_parameters.items(): if input_name in dummy_inputs: raise KeyError( 'Got name collision for input key %s. Consider renaming ' 'either input parameters or input ' 'artifacts.' % input_name) dummy_inputs[input_name] = _DUMMY_PATH # Construct the AIP (Unified) custom job op. return AiPlatformCustomJobOp( name=task_name, custom_job_spec=job_spec, component_spec=pipeline_component_spec, task_spec=pipeline_task_spec, task_inputs=[ dsl.InputArgumentPath( argument=dummy_inputs[input_name], input=input_name, path=path, ) for input_name, path in dummy_inputs.items() ], task_outputs=dummy_outputs)