def _group_to_dag_spec( self, group: dsl.OpsGroup, inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]], outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]], dependencies: Dict[str, List[_GroupOrOp]], pipeline_spec: pipeline_spec_pb2.PipelineSpec, deployment_config: pipeline_spec_pb2.PipelineDeploymentConfig, rootgroup_name: str, ) -> None: """Generate IR spec given an OpsGroup. Args: group: The OpsGroup to generate spec for. inputs: The inputs dictionary. The keys are group/op names and values are lists of tuples (param, producing_op_name). outputs: The outputs dictionary. The keys are group/op names and values are lists of tuples (param, producing_op_name). dependencies: The group dependencies dictionary. The keys are group/op names, and the values are lists of dependent groups/ops. pipeline_spec: The pipeline_spec to update in-place. deployment_config: The deployment_config to hold all executors. rootgroup_name: The name of the group root. Used to determine whether the component spec for the current group should be the root dag. """ group_component_name = dsl_utils.sanitize_component_name(group.name) if group.name == rootgroup_name: group_component_spec = pipeline_spec.root else: group_component_spec = pipeline_spec.components[group_component_name] # Generate task specs and component specs for the dag. subgroups = group.groups + group.ops for subgroup in subgroups: subgroup_task_spec = getattr(subgroup, 'task_spec', pipeline_spec_pb2.PipelineTaskSpec()) subgroup_component_spec = getattr(subgroup, 'component_spec', pipeline_spec_pb2.ComponentSpec()) is_loop_subgroup = (isinstance(group, dsl.ParallelFor)) is_recursive_subgroup = ( isinstance(subgroup, dsl.OpsGroup) and subgroup.recursive_ref) # Special handling for recursive subgroup: use the existing opsgroup name if is_recursive_subgroup: subgroup_key = subgroup.recursive_ref.name else: subgroup_key = subgroup.name subgroup_task_spec.task_info.name = ( subgroup_task_spec.task_info.name or dsl_utils.sanitize_task_name(subgroup_key)) # human_name exists for ops only, and is used to de-dupe component spec. subgroup_component_name = ( subgroup_task_spec.component_ref.name or dsl_utils.sanitize_component_name( getattr(subgroup, 'human_name', subgroup_key))) subgroup_task_spec.component_ref.name = subgroup_component_name if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'graph': raise NotImplementedError( 'dsl.graph_component is not yet supported in KFP v2 compiler.') if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'exit_handler': raise NotImplementedError( 'dsl.ExitHandler is not yet supported in KFP v2 compiler.') importer_tasks = [] # Add importer node when applicable for input_name in subgroup_task_spec.inputs.artifacts: if not subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task: type_schema = type_utils.get_input_artifact_type_schema( input_name, subgroup._metadata.inputs) importer_name = importer_node.generate_importer_base_name( dependent_task_name=subgroup_task_spec.task_info.name, input_name=input_name) importer_task_spec = importer_node.build_importer_task_spec( importer_name) importer_comp_spec = importer_node.build_importer_component_spec( importer_base_name=importer_name, input_name=input_name, input_type_schema=type_schema) importer_task_name = importer_task_spec.task_info.name importer_comp_name = importer_task_spec.component_ref.name importer_exec_label = importer_comp_spec.executor_label group_component_spec.dag.tasks[importer_task_name].CopyFrom( importer_task_spec) pipeline_spec.components[importer_comp_name].CopyFrom( importer_comp_spec) subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task = ( importer_task_name) subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.output_artifact_key = ( importer_node.OUTPUT_KEY) # Retrieve the pre-built importer spec importer_spec = subgroup.importer_specs[input_name] deployment_config.executors[importer_exec_label].importer.CopyFrom( importer_spec) importer_tasks.append(importer_task_name) group_inputs = inputs.get(group.name, []) subgroup_inputs = inputs.get(subgroup.name, []) subgroup_params = [param for param, _ in subgroup_inputs] tasks_in_current_dag = [ dsl_utils.sanitize_task_name(subgroup.name) for subgroup in subgroups ] + importer_tasks is_parent_component_root = group_component_spec == pipeline_spec.root # Additional spec modifications for dsl.ParallelFor's subgroups. if is_loop_subgroup: self._update_loop_specs(group, subgroup, group_component_spec, subgroup_component_spec, subgroup_task_spec) elif isinstance(subgroup, dsl.ContainerOp): dsl_component_spec.update_task_inputs_spec( subgroup_task_spec, group_component_spec.input_definitions, subgroup_params, tasks_in_current_dag, ) if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'condition': # "punch the hole", adding inputs needed by its subgroup or tasks. dsl_component_spec.build_component_inputs_spec( component_spec=subgroup_component_spec, pipeline_params=subgroup_params, is_root_component=False, ) dsl_component_spec.build_task_inputs_spec( subgroup_task_spec, subgroup_params, tasks_in_current_dag, is_parent_component_root, ) condition = subgroup.condition operand_values = [] for operand in [condition.operand1, condition.operand2]: operand_values.append(self._resolve_value_or_reference(operand)) condition_string = '{} {} {}'.format(operand_values[0], condition.operator, operand_values[1]) subgroup_task_spec.trigger_policy.CopyFrom( pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy( condition=condition_string)) # Generate dependencies section for this task. if dependencies.get(subgroup.name, None): group_dependencies = list(dependencies[subgroup.name]) group_dependencies.sort() subgroup_task_spec.dependent_tasks.extend( [dsl_utils.sanitize_task_name(dep) for dep in group_dependencies]) if isinstance(subgroup, dsl.ParallelFor): if subgroup.parallelism is not None: warnings.warn( 'Setting parallelism in ParallelFor is not supported yet.' 'The setting is ignored.') # Remove loop arguments related inputs from parent group component spec. input_names = [param.full_name for param, _ in inputs[subgroup.name]] for input_name in input_names: if _for_loop.LoopArguments.name_is_loop_argument(input_name): dsl_component_spec.pop_input_from_component_spec( group_component_spec, input_name) if subgroup.items_is_pipeline_param: # These loop args are a 'withParam' rather than 'withItems'. # i.e., rather than a static list, they are either the output of # another task or were input as global pipeline parameters. pipeline_param = subgroup.loop_args.items_or_pipeline_param input_parameter_name = pipeline_param.full_name if pipeline_param.op_name: subgroup_task_spec.inputs.parameters[ input_parameter_name].task_output_parameter.producer_task = ( dsl_utils.sanitize_task_name(pipeline_param.op_name)) subgroup_task_spec.inputs.parameters[ input_parameter_name].task_output_parameter.output_parameter_key = ( pipeline_param.name) else: subgroup_task_spec.inputs.parameters[ input_parameter_name].component_input_parameter = ( input_parameter_name) if pipeline_param.op_name is None: # Input parameter is from pipeline func rather than component output. # Correct loop argument input type in the parent component spec. # The loop argument was categorized as an artifact due to its missing # or non-primitive type annotation. But it should always be String # typed, as its value is a serialized JSON string. dsl_component_spec.pop_input_from_component_spec( group_component_spec, input_parameter_name) group_component_spec.input_definitions.parameters[ input_parameter_name].type = pipeline_spec_pb2.PrimitiveType.STRING # Add component spec if not exists if subgroup_component_name not in pipeline_spec.components: pipeline_spec.components[subgroup_component_name].CopyFrom( subgroup_component_spec) # Add task spec group_component_spec.dag.tasks[ subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec) # Add executor spec, if applicable. container_spec = getattr(subgroup, 'container_spec', None) if container_spec: if compiler_utils.is_v2_component(subgroup): compiler_utils.refactor_v2_container_spec(container_spec) executor_label = subgroup_component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[executor_label].container.CopyFrom( container_spec) # Add AIPlatformCustomJobSpec, if applicable. custom_job_spec = getattr(subgroup, 'custom_job_spec', None) if custom_job_spec: executor_label = subgroup_component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[ executor_label].custom_job.custom_job.update(custom_job_spec) pipeline_spec.deployment_spec.update( json_format.MessageToDict(deployment_config))
def _group_to_dag_spec( self, group: dsl.OpsGroup, inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]], outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]], dependencies: Dict[str, List[_GroupOrOp]], pipeline_spec: pipeline_spec_pb2.PipelineSpec, deployment_config: pipeline_spec_pb2.PipelineDeploymentConfig, rootgroup_name: str, op_to_parent_groups: Dict[str, List[str]], ) -> None: """Generate IR spec given an OpsGroup. Args: group: The OpsGroup to generate spec for. inputs: The inputs dictionary. The keys are group/op names and values are lists of tuples (param, producing_op_name). outputs: The outputs dictionary. The keys are group/op names and values are lists of tuples (param, producing_op_name). dependencies: The group dependencies dictionary. The keys are group/op names, and the values are lists of dependent groups/ops. pipeline_spec: The pipeline_spec to update in-place. deployment_config: The deployment_config to hold all executors. rootgroup_name: The name of the group root. Used to determine whether the component spec for the current group should be the root dag. op_to_parent_groups: The dict of op name to parent groups. Key is the op's name. Value is a list of ancestor groups including the op itself. The list of a given op is sorted in a way that the farthest group is the first and the op itself is the last. """ group_component_name = dsl_utils.sanitize_component_name(group.name) if group.name == rootgroup_name: group_component_spec = pipeline_spec.root else: group_component_spec = pipeline_spec.components[group_component_name] # Generate task specs and component specs for the dag. subgroups = group.groups + group.ops for subgroup in subgroups: subgroup_task_spec = getattr(subgroup, 'task_spec', pipeline_spec_pb2.PipelineTaskSpec()) subgroup_component_spec = getattr(subgroup, 'component_spec', pipeline_spec_pb2.ComponentSpec()) is_recursive_subgroup = ( isinstance(subgroup, dsl.OpsGroup) and subgroup.recursive_ref) # Special handling for recursive subgroup: use the existing opsgroup name if is_recursive_subgroup: subgroup_key = subgroup.recursive_ref.name else: subgroup_key = subgroup.name subgroup_task_spec.task_info.name = ( subgroup_task_spec.task_info.name or dsl_utils.sanitize_task_name(subgroup_key)) # human_name exists for ops only, and is used to de-dupe component spec. subgroup_component_name = ( subgroup_task_spec.component_ref.name or dsl_utils.sanitize_component_name( getattr(subgroup, 'human_name', subgroup_key))) subgroup_task_spec.component_ref.name = subgroup_component_name if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'graph': raise NotImplementedError( 'dsl.graph_component is not yet supported in KFP v2 compiler.') if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'exit_handler': raise NotImplementedError( 'dsl.ExitHandler is not yet supported in KFP v2 compiler.') if isinstance(subgroup, dsl.ContainerOp): if hasattr(subgroup, 'importer_spec'): importer_task_name = subgroup.task_spec.task_info.name importer_comp_name = subgroup.task_spec.component_ref.name importer_exec_label = subgroup.component_spec.executor_label group_component_spec.dag.tasks[importer_task_name].CopyFrom( subgroup.task_spec) pipeline_spec.components[importer_comp_name].CopyFrom( subgroup.component_spec) deployment_config.executors[importer_exec_label].importer.CopyFrom( subgroup.importer_spec) subgroup_inputs = inputs.get(subgroup.name, []) subgroup_params = [param for param, _ in subgroup_inputs] tasks_in_current_dag = [ dsl_utils.sanitize_task_name(subgroup.name) for subgroup in subgroups ] input_parameters_in_current_dag = [ input_name for input_name in group_component_spec.input_definitions.parameters ] input_artifacts_in_current_dag = [ input_name for input_name in group_component_spec.input_definitions.artifacts ] is_parent_component_root = group_component_spec == pipeline_spec.root if isinstance(subgroup, dsl.ContainerOp): dsl_component_spec.update_task_inputs_spec( subgroup_task_spec, group_component_spec.input_definitions, subgroup_params, tasks_in_current_dag, input_parameters_in_current_dag, input_artifacts_in_current_dag, ) if isinstance(subgroup, dsl.ParallelFor): if subgroup.parallelism is not None: warnings.warn( 'Setting parallelism in ParallelFor is not supported yet.' 'The setting is ignored.') # "Punch the hole", adding additional inputs (other than loop arguments # which will be handled separately) needed by its subgroup or tasks. loop_subgroup_params = [] for param in subgroup_params: if isinstance( param, (_for_loop.LoopArguments, _for_loop.LoopArgumentVariable)): continue loop_subgroup_params.append(param) if subgroup.items_is_pipeline_param: # This loop_args is a 'withParam' rather than a 'withItems'. # i.e., rather than a static list, it is either the output of # another task or an input as global pipeline parameters. loop_subgroup_params.append( subgroup.loop_args.items_or_pipeline_param) dsl_component_spec.build_component_inputs_spec( component_spec=subgroup_component_spec, pipeline_params=loop_subgroup_params, is_root_component=False, ) dsl_component_spec.build_task_inputs_spec( subgroup_task_spec, loop_subgroup_params, tasks_in_current_dag, is_parent_component_root, ) if subgroup.items_is_pipeline_param: input_parameter_name = ( dsl_component_spec.additional_input_name_for_pipelineparam( subgroup.loop_args.items_or_pipeline_param)) loop_arguments_item = '{}-{}'.format( input_parameter_name, _for_loop.LoopArguments.LOOP_ITEM_NAME_BASE) subgroup_component_spec.input_definitions.parameters[ loop_arguments_item].type = pipeline_spec_pb2.PrimitiveType.STRING subgroup_task_spec.parameter_iterator.items.input_parameter = ( input_parameter_name) subgroup_task_spec.parameter_iterator.item_input = ( loop_arguments_item) # If the loop arguments itself is a loop arguments variable, handle # the subvar name. loop_args_name, subvar_name = ( dsl_component_spec._exclude_loop_arguments_variables( subgroup.loop_args.items_or_pipeline_param)) if subvar_name: subgroup_task_spec.inputs.parameters[ input_parameter_name].parameter_expression_selector = ( 'parseJson(string_value)["{}"]'.format(subvar_name)) subgroup_task_spec.inputs.parameters[ input_parameter_name].component_input_parameter = ( dsl_component_spec.additional_input_name_for_pipelineparam( loop_args_name)) else: input_parameter_name = ( dsl_component_spec.additional_input_name_for_pipelineparam( subgroup.loop_args.full_name)) raw_values = subgroup.loop_args.to_list_for_task_yaml() subgroup_component_spec.input_definitions.parameters[ input_parameter_name].type = pipeline_spec_pb2.PrimitiveType.STRING subgroup_task_spec.parameter_iterator.items.raw = json.dumps( raw_values, sort_keys=True) subgroup_task_spec.parameter_iterator.item_input = ( input_parameter_name) if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'condition': # "punch the hole", adding inputs needed by its subgroup or tasks. dsl_component_spec.build_component_inputs_spec( component_spec=subgroup_component_spec, pipeline_params=subgroup_params, is_root_component=False, ) dsl_component_spec.build_task_inputs_spec( subgroup_task_spec, subgroup_params, tasks_in_current_dag, is_parent_component_root, ) condition = subgroup.condition operand_values = [] operand1_value, operand2_value = self._resolve_condition_operands( condition.operand1, condition.operand2) condition_string = '{} {} {}'.format(operand1_value, condition.operator, operand2_value) subgroup_task_spec.trigger_policy.CopyFrom( pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy( condition=condition_string)) # Generate dependencies section for this task. if dependencies.get(subgroup.name, None): group_dependencies = list(dependencies[subgroup.name]) group_dependencies.sort() subgroup_task_spec.dependent_tasks.extend( [dsl_utils.sanitize_task_name(dep) for dep in group_dependencies]) # Add component spec if not exists if subgroup_component_name not in pipeline_spec.components: pipeline_spec.components[subgroup_component_name].CopyFrom( subgroup_component_spec) # Add task spec group_component_spec.dag.tasks[ subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec) # Add AIPlatformCustomJobSpec, if applicable. custom_job_spec = getattr(subgroup, 'custom_job_spec', None) if custom_job_spec: executor_label = subgroup_component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[ executor_label].custom_job.custom_job.update(custom_job_spec) # Add executor spec, if applicable. container_spec = getattr(subgroup, 'container_spec', None) # Ignore contaienr_spec if custom_job_spec exists. if container_spec and not custom_job_spec: if compiler_utils.is_v2_component(subgroup): compiler_utils.refactor_v2_container_spec(container_spec) executor_label = subgroup_component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[executor_label].container.CopyFrom( container_spec) pipeline_spec.deployment_spec.update( json_format.MessageToDict(deployment_config)) # Surface metrics outputs to the top. self._populate_metrics_in_dag_outputs( group.ops, op_to_parent_groups, pipeline_spec, )
def _create_pipeline_spec( self, args: List[dsl.PipelineParam], pipeline: dsl.Pipeline, ) -> pipeline_spec_pb2.PipelineSpec: """Creates the pipeline spec object. Args: args: The list of pipeline arguments. pipeline: The instantiated pipeline object. Returns: A PipelineSpec proto representing the compiled pipeline. Raises: NotImplementedError if the argument is of unsupported types. """ compiler_utils.validate_pipeline_name(pipeline.name) pipeline_spec = pipeline_spec_pb2.PipelineSpec() pipeline_spec.pipeline_info.name = pipeline.name pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__) # Schema version 2.0.0 is required for kfp-pipeline-spec>0.1.3.1 pipeline_spec.schema_version = '2.0.0' pipeline_spec.root.CopyFrom( dsl_component_spec.build_root_spec_from_pipeline_params(args)) deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() for op in pipeline.ops.values(): task_name = op.task_spec.task_info.name component_name = op.task_spec.component_ref.name executor_label = op.component_spec.executor_label pipeline_spec.root.dag.tasks[task_name].CopyFrom(op.task_spec) pipeline_spec.components[component_name].CopyFrom( op.component_spec) if compiler_utils.is_v2_component(op): compiler_utils.refactor_v2_container_spec(op.container_spec) deployment_config.executors[executor_label].container.CopyFrom( op.container_spec) task = pipeline_spec.root.dag.tasks[task_name] # A task may have explicit depdency on other tasks even though they may # not have inputs/outputs dependency. e.g.: op2.after(op1) if op.dependent_names: op.dependent_names = [ dsl_utils.sanitize_task_name(name) for name in op.dependent_names ] task.dependent_tasks.extend(op.dependent_names) # Check if need to insert importer node for input_name in task.inputs.artifacts: if not task.inputs.artifacts[ input_name].task_output_artifact.producer_task: type_schema = type_utils.get_input_artifact_type_schema( input_name, op._metadata.inputs) importer_name = importer_node.generate_importer_base_name( dependent_task_name=task_name, input_name=input_name) importer_task_spec = importer_node.build_importer_task_spec( importer_name) importer_comp_spec = importer_node.build_importer_component_spec( importer_base_name=importer_name, input_name=input_name, input_type_schema=type_schema) importer_task_name = importer_task_spec.task_info.name importer_comp_name = importer_task_spec.component_ref.name importer_exec_label = importer_comp_spec.executor_label pipeline_spec.root.dag.tasks[importer_task_name].CopyFrom( importer_task_spec) pipeline_spec.components[importer_comp_name].CopyFrom( importer_comp_spec) task.inputs.artifacts[ input_name].task_output_artifact.producer_task = ( importer_task_name) task.inputs.artifacts[ input_name].task_output_artifact.output_artifact_key = ( importer_node.OUTPUT_KEY) # Retrieve the pre-built importer spec importer_spec = op.importer_specs[input_name] deployment_config.executors[ importer_exec_label].importer.CopyFrom(importer_spec) pipeline_spec.deployment_spec.update( json_format.MessageToDict(deployment_config)) return pipeline_spec
def _group_to_dag_spec( self, group: dsl.OpsGroup, inputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]], outputs: Dict[str, List[Tuple[dsl.PipelineParam, str]]], dependencies: Dict[str, List[_GroupOrOp]], pipeline_spec: pipeline_spec_pb2.PipelineSpec, rootgroup_name: str, ) -> None: """Generate IR spec given an OpsGroup. Args: group: The OpsGroup to generate spec for. inputs: The inputs dictionary. The keys are group/op names and values are lists of tuples (param, producing_op_name). outputs: The outputs dictionary. The keys are group/op names and values are lists of tuples (param, producing_op_name). dependencies: The group dependencies dictionary. The keys are group/op names, and the values are lists of dependent groups/ops. pipeline_spec: The pipeline_spec to update in-place. rootgroup_name: The name of the group root. Used to determine whether the component spec for the current group should be the root dag. """ group_component_name = dsl_utils.sanitize_component_name(group.name) if group.name == rootgroup_name: group_component_spec = pipeline_spec.root else: group_component_spec = pipeline_spec.components[ group_component_name] deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() # Generate component inputs spec. if inputs.get(group.name, None): dsl_component_spec.build_component_inputs_spec( group_component_spec, [param for param, _ in inputs[group.name]]) # Generate component outputs spec. if outputs.get(group.name, None): group_component_spec.output_definitions.CopyFrom( dsl_component_spec.build_component_outputs_spec( [param for param, _ in outputs[group.name]])) # Generate task specs and component specs for the dag. subgroups = group.groups + group.ops for subgroup in subgroups: subgroup_task_spec = getattr(subgroup, 'task_spec', pipeline_spec_pb2.PipelineTaskSpec()) subgroup_component_spec = getattr( subgroup, 'component_spec', pipeline_spec_pb2.ComponentSpec()) is_recursive_subgroup = (isinstance(subgroup, dsl.OpsGroup) and subgroup.recursive_ref) # Special handling for recursive subgroup: use the existing opsgroup name if is_recursive_subgroup: subgroup_key = subgroup.recursive_ref.name else: subgroup_key = subgroup.name subgroup_task_spec.task_info.name = dsl_utils.sanitize_task_name( subgroup_key) # human_name exists for ops only, and is used to de-dupe component spec. subgroup_component_name = dsl_utils.sanitize_component_name( getattr(subgroup, 'human_name', subgroup_key)) subgroup_task_spec.component_ref.name = subgroup_component_name if isinstance(subgroup, dsl.OpsGroup) and subgroup.type == 'condition': condition = subgroup.condition operand_values = [] subgroup_inputs = inputs.get(subgroup.name, []) subgroup_params = [param for param, _ in subgroup_inputs] tasks_in_current_dag = [ subgroup.name for subgroup in subgroups ] dsl_component_spec.build_component_inputs_spec( subgroup_component_spec, subgroup_params, ) dsl_component_spec.build_task_inputs_spec( subgroup_task_spec, subgroup_params, tasks_in_current_dag, ) for operand in [condition.operand1, condition.operand2]: operand_values.append( self._resolve_value_or_reference(operand)) condition_string = '{} {} {}'.format(operand_values[0], condition.operator, operand_values[1]) subgroup_task_spec.trigger_policy.CopyFrom( pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy( condition=condition_string)) # Generate dependencies section for this task. if dependencies.get(subgroup.name, None): group_dependencies = list(dependencies[subgroup.name]) group_dependencies.sort() subgroup_task_spec.dependent_tasks.extend([ dsl_utils.sanitize_task_name(dep) for dep in group_dependencies ]) # Add importer node when applicable for input_name in subgroup_task_spec.inputs.artifacts: if not subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task: type_schema = type_utils.get_input_artifact_type_schema( input_name, subgroup._metadata.inputs) importer_name = importer_node.generate_importer_base_name( dependent_task_name=subgroup_task_spec.task_info.name, input_name=input_name) importer_task_spec = importer_node.build_importer_task_spec( importer_name) importer_comp_spec = importer_node.build_importer_component_spec( importer_base_name=importer_name, input_name=input_name, input_type_schema=type_schema) importer_task_name = importer_task_spec.task_info.name importer_comp_name = importer_task_spec.component_ref.name importer_exec_label = importer_comp_spec.executor_label group_component_spec.dag.tasks[ importer_task_name].CopyFrom(importer_task_spec) pipeline_spec.components[importer_comp_name].CopyFrom( importer_comp_spec) subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.producer_task = ( importer_task_name) subgroup_task_spec.inputs.artifacts[ input_name].task_output_artifact.output_artifact_key = ( importer_node.OUTPUT_KEY) # Retrieve the pre-built importer spec importer_spec = subgroup.importer_specs[input_name] deployment_config.executors[ importer_exec_label].importer.CopyFrom(importer_spec) # Add component spec if not exists if subgroup_component_name not in pipeline_spec.components: pipeline_spec.components[subgroup_component_name].CopyFrom( subgroup_component_spec) # Add task spec group_component_spec.dag.tasks[ subgroup_task_spec.task_info.name].CopyFrom(subgroup_task_spec) # Add executor spec container_spec = getattr(subgroup, 'container_spec', None) if container_spec: if compiler_utils.is_v2_component(subgroup): compiler_utils.refactor_v2_container_spec(container_spec) executor_label = subgroup_component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[ executor_label].container.CopyFrom(container_spec) pipeline_spec.deployment_spec.update( json_format.MessageToDict(deployment_config))