def test_validate_pipeline_name(self): compiler_utils.validate_pipeline_name('my-pipeline') compiler_utils.validate_pipeline_name('p' * 128) with self.assertRaisesRegex(ValueError, 'Invalid pipeline name: '): compiler_utils.validate_pipeline_name('my_pipeline') with self.assertRaisesRegex(ValueError, 'Invalid pipeline name: '): compiler_utils.validate_pipeline_name('My pipeline') with self.assertRaisesRegex(ValueError, 'Invalid pipeline name: '): compiler_utils.validate_pipeline_name('-my-pipeline') with self.assertRaisesRegex(ValueError, 'Invalid pipeline name: '): compiler_utils.validate_pipeline_name('p' * 129)
def _create_pipeline_spec( self, args: List[dsl.PipelineParam], pipeline: dsl.Pipeline, ) -> pipeline_spec_pb2.PipelineSpec: """Creates the pipeline spec object. Args: args: The list of pipeline arguments. pipeline: The instantiated pipeline object. Returns: A PipelineSpec proto representing the compiled pipeline. Raises: NotImplementedError if the argument is of unsupported types. """ compiler_utils.validate_pipeline_name(pipeline.name) deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() pipeline_spec = pipeline_spec_pb2.PipelineSpec() pipeline_spec.pipeline_info.name = pipeline.name pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__) # Schema version 2.0.0 is required for kfp-pipeline-spec>0.1.3.1 pipeline_spec.schema_version = '2.0.0' dsl_component_spec.build_component_inputs_spec( component_spec=pipeline_spec.root, pipeline_params=args, is_root_component=True) root_group = pipeline.groups[0] opsgroups = self._get_groups(root_group) op_name_to_parent_groups = self._get_groups_for_ops(root_group) opgroup_name_to_parent_groups = self._get_groups_for_opsgroups(root_group) condition_params = self._get_condition_params_for_ops(root_group) op_name_to_for_loop_op = self._get_for_loop_ops(root_group) inputs, outputs = self._get_inputs_outputs( pipeline, args, root_group, op_name_to_parent_groups, opgroup_name_to_parent_groups, condition_params, op_name_to_for_loop_op, ) dependencies = self._get_dependencies( pipeline, root_group, op_name_to_parent_groups, opgroup_name_to_parent_groups, opsgroups, condition_params, ) for opsgroup_name in opsgroups.keys(): self._group_to_dag_spec( opsgroups[opsgroup_name], inputs, outputs, dependencies, pipeline_spec, deployment_config, root_group.name, ) return pipeline_spec
def _create_pipeline_spec( self, args: List[dsl.PipelineParam], pipeline: dsl.Pipeline, ) -> pipeline_spec_pb2.PipelineSpec: """Creates the pipeline spec object. Args: args: The list of pipeline arguments. pipeline: The instantiated pipeline object. Returns: A PipelineSpec proto representing the compiled pipeline. Raises: NotImplementedError if the argument is of unsupported types. """ compiler_utils.validate_pipeline_name(pipeline.name) pipeline_spec = pipeline_spec_pb2.PipelineSpec( runtime_parameters=compiler_utils.build_runtime_parameter_spec( args)) pipeline_spec.pipeline_info.name = pipeline.name pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__) pipeline_spec.schema_version = 'v2alpha1' deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() importer_tasks = [] for op in pipeline.ops.values(): component_spec = op._metadata task = pipeline_spec.tasks.add() task.CopyFrom(op.task_spec) deployment_config.executors[ task.executor_label].container.CopyFrom(op.container_spec) # A task may have explicit depdency on other tasks even though they may # not have inputs/outputs dependency. e.g.: op2.after(op1) if op.dependent_names: task.dependent_tasks.extend(op.dependent_names) # Check if need to insert importer node for input_name in task.inputs.artifacts: if not task.inputs.artifacts[input_name].producer_task: type_schema = type_utils.get_input_artifact_type_schema( input_name, component_spec.inputs) importer_task = importer_node.build_importer_task_spec( dependent_task=task, input_name=input_name, input_type_schema=type_schema) importer_tasks.append(importer_task) task.inputs.artifacts[ input_name].producer_task = importer_task.task_info.name task.inputs.artifacts[ input_name].output_artifact_key = importer_node.OUTPUT_KEY # Retrieve the pre-built importer spec importer_spec = op.importer_spec[input_name] deployment_config.executors[ importer_task.executor_label].importer.CopyFrom( importer_spec) pipeline_spec.deployment_config.Pack(deployment_config) pipeline_spec.tasks.extend(importer_tasks) return pipeline_spec
def _create_pipeline_spec( self, args: List[dsl.PipelineParam], pipeline: dsl.Pipeline, ) -> pipeline_spec_pb2.PipelineSpec: """Creates the pipeline spec object. Args: args: The list of pipeline arguments. pipeline: The instantiated pipeline object. Returns: A PipelineSpec proto representing the compiled pipeline. Raises: NotImplementedError if the argument is of unsupported types. """ compiler_utils.validate_pipeline_name(pipeline.name) pipeline_spec = pipeline_spec_pb2.PipelineSpec() pipeline_spec.pipeline_info.name = pipeline.name pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__) # Schema version 2.0.0 is required for kfp-pipeline-spec>0.1.3.1 pipeline_spec.schema_version = '2.0.0' pipeline_spec.root.CopyFrom( dsl_component_spec.build_root_spec_from_pipeline_params(args)) deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() for op in pipeline.ops.values(): task_name = op.task_spec.task_info.name component_name = op.task_spec.component_ref.name executor_label = op.component_spec.executor_label pipeline_spec.root.dag.tasks[task_name].CopyFrom(op.task_spec) pipeline_spec.components[component_name].CopyFrom( op.component_spec) deployment_config.executors[executor_label].container.CopyFrom( op.container_spec) task = pipeline_spec.root.dag.tasks[task_name] # A task may have explicit depdency on other tasks even though they may # not have inputs/outputs dependency. e.g.: op2.after(op1) if op.dependent_names: op.dependent_names = [ dsl_utils.sanitize_task_name(name) for name in op.dependent_names ] task.dependent_tasks.extend(op.dependent_names) # Check if need to insert importer node for input_name in task.inputs.artifacts: if not task.inputs.artifacts[ input_name].task_output_artifact.producer_task: type_schema = type_utils.get_input_artifact_type_schema( input_name, op._metadata.inputs) importer_name = importer_node.generate_importer_base_name( dependent_task_name=task_name, input_name=input_name) importer_task_spec = importer_node.build_importer_task_spec( importer_name) importer_comp_spec = importer_node.build_importer_component_spec( importer_base_name=importer_name, input_name=input_name, input_type_schema=type_schema) importer_task_name = importer_task_spec.task_info.name importer_comp_name = importer_task_spec.component_ref.name importer_exec_label = importer_comp_spec.executor_label pipeline_spec.root.dag.tasks[importer_task_name].CopyFrom( importer_task_spec) pipeline_spec.components[importer_comp_name].CopyFrom( importer_comp_spec) task.inputs.artifacts[ input_name].task_output_artifact.producer_task = ( importer_task_name) task.inputs.artifacts[ input_name].task_output_artifact.output_artifact_key = ( importer_node.OUTPUT_KEY) # Retrieve the pre-built importer spec importer_spec = op.importer_specs[input_name] deployment_config.executors[ importer_exec_label].importer.CopyFrom(importer_spec) pipeline_spec.deployment_spec.update( json_format.MessageToDict(deployment_config)) return pipeline_spec
def _create_pipeline_spec( self, args: List[dsl.PipelineParam], pipeline: dsl.Pipeline, ) -> pipeline_spec_pb2.PipelineSpec: """Creates the pipeline spec object. Args: args: The list of pipeline arguments. pipeline: The instantiated pipeline object. Returns: A PipelineSpec proto representing the compiled pipeline. Raises: NotImplementedError if the argument is of unsupported types. """ compiler_utils.validate_pipeline_name(pipeline.name) deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() pipeline_spec = pipeline_spec_pb2.PipelineSpec() pipeline_spec.pipeline_info.name = pipeline.name pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__) # Schema version 2.0.0 is required for kfp-pipeline-spec>0.1.3.1 pipeline_spec.schema_version = '2.0.0' dsl_component_spec.build_component_inputs_spec( component_spec=pipeline_spec.root, pipeline_params=args, is_root_component=True) root_group = pipeline.groups[0] opsgroups = self._get_groups(root_group) op_name_to_parent_groups = self._get_groups_for_ops(root_group) opgroup_name_to_parent_groups = self._get_groups_for_opsgroups(root_group) condition_params = self._get_condition_params_for_ops(root_group) op_name_to_for_loop_op = self._get_for_loop_ops(root_group) inputs, outputs = self._get_inputs_outputs( pipeline, args, root_group, op_name_to_parent_groups, opgroup_name_to_parent_groups, condition_params, op_name_to_for_loop_op, ) dependencies = self._get_dependencies( pipeline, root_group, op_name_to_parent_groups, opgroup_name_to_parent_groups, opsgroups, condition_params, ) for opsgroup_name in opsgroups.keys(): self._group_to_dag_spec( opsgroups[opsgroup_name], inputs, outputs, dependencies, pipeline_spec, deployment_config, root_group.name, op_name_to_parent_groups, ) # Exit Handler if pipeline.groups[0].groups: first_group = pipeline.groups[0].groups[0] if first_group.type == 'exit_handler': exit_handler_op = first_group.exit_op # Add exit op task spec task_name = exit_handler_op.task_spec.task_info.name exit_handler_op.task_spec.dependent_tasks.extend( pipeline_spec.root.dag.tasks.keys()) exit_handler_op.task_spec.trigger_policy.strategy = ( pipeline_spec_pb2.PipelineTaskSpec.TriggerPolicy.TriggerStrategy .ALL_UPSTREAM_TASKS_COMPLETED) pipeline_spec.root.dag.tasks[task_name].CopyFrom( exit_handler_op.task_spec) # Add exit op component spec if it does not exist. component_name = exit_handler_op.task_spec.component_ref.name if component_name not in pipeline_spec.components: pipeline_spec.components[component_name].CopyFrom( exit_handler_op.component_spec) # Add exit op executor spec if it does not exist. executor_label = exit_handler_op.component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[executor_label].container.CopyFrom( exit_handler_op.container_spec) pipeline_spec.deployment_spec.update( json_format.MessageToDict(deployment_config)) return pipeline_spec
def _create_pipeline_spec( self, pipeline_args: List[dsl.PipelineChannel], pipeline: pipeline_context.Pipeline, ) -> pipeline_spec_pb2.PipelineSpec: """Creates a pipeline spec object. Args: pipeline_args: The list of pipeline input parameters. pipeline: The instantiated pipeline object. Returns: A PipelineSpec proto representing the compiled pipeline. Raises: ValueError if the argument is of unsupported types. """ compiler_utils.validate_pipeline_name(pipeline.name) deployment_config = pipeline_spec_pb2.PipelineDeploymentConfig() pipeline_spec = pipeline_spec_pb2.PipelineSpec() pipeline_spec.pipeline_info.name = pipeline.name pipeline_spec.sdk_version = 'kfp-{}'.format(kfp.__version__) # Schema version 2.1.0 is required for kfp-pipeline-spec>0.1.13 pipeline_spec.schema_version = '2.1.0' pipeline_spec.root.CopyFrom( builder.build_component_spec_for_group( pipeline_channels=pipeline_args, is_root_group=True, )) root_group = pipeline.groups[0] all_groups = self._get_all_groups(root_group) group_name_to_group = {group.name: group for group in all_groups} task_name_to_parent_groups, group_name_to_parent_groups = ( self._get_parent_groups(root_group)) condition_channels = self._get_condition_channels_for_tasks(root_group) name_to_for_loop_group = { group_name: group for group_name, group in group_name_to_group.items() if isinstance(group, dsl.ParallelFor) } inputs = self._get_inputs_for_all_groups( pipeline=pipeline, pipeline_args=pipeline_args, root_group=root_group, task_name_to_parent_groups=task_name_to_parent_groups, group_name_to_parent_groups=group_name_to_parent_groups, condition_channels=condition_channels, name_to_for_loop_group=name_to_for_loop_group, ) dependencies = self._get_dependencies( pipeline=pipeline, root_group=root_group, task_name_to_parent_groups=task_name_to_parent_groups, group_name_to_parent_groups=group_name_to_parent_groups, group_name_to_group=group_name_to_group, condition_channels=condition_channels, ) for group in all_groups: self._build_spec_by_group( pipeline_spec=pipeline_spec, deployment_config=deployment_config, group=group, inputs=inputs, dependencies=dependencies, rootgroup_name=root_group.name, task_name_to_parent_groups=task_name_to_parent_groups, group_name_to_parent_groups=group_name_to_parent_groups, name_to_for_loop_group=name_to_for_loop_group, ) # TODO: refactor to support multiple exit handler per pipeline. if pipeline.groups[0].groups: first_group = pipeline.groups[0].groups[0] if isinstance(first_group, dsl.ExitHandler): exit_task = first_group.exit_task exit_task_name = component_utils.sanitize_task_name( exit_task.name) exit_handler_group_task_name = component_utils.sanitize_task_name( first_group.name) input_parameters_in_current_dag = [ input_name for input_name in pipeline_spec.root.input_definitions.parameters ] exit_task_task_spec = builder.build_task_spec_for_exit_task( task=exit_task, dependent_task=exit_handler_group_task_name, pipeline_inputs=pipeline_spec.root.input_definitions, ) exit_task_component_spec = builder.build_component_spec_for_task( task=exit_task) exit_task_container_spec = builder.build_container_spec_for_task( task=exit_task) # Add exit task task spec pipeline_spec.root.dag.tasks[exit_task_name].CopyFrom( exit_task_task_spec) # Add exit task component spec if it does not exist. component_name = exit_task_task_spec.component_ref.name if component_name not in pipeline_spec.components: pipeline_spec.components[component_name].CopyFrom( exit_task_component_spec) # Add exit task container spec if it does not exist. executor_label = exit_task_component_spec.executor_label if executor_label not in deployment_config.executors: deployment_config.executors[ executor_label].container.CopyFrom( exit_task_container_spec) pipeline_spec.deployment_spec.update( json_format.MessageToDict(deployment_config)) return pipeline_spec