def testEnsureTopologicalOrder(self): a = EmptyComponent(name="a") b = EmptyComponent(name="b") c = EmptyComponent(name="c") a.add_downstream_node(b) a.add_downstream_node(c) valid_orders = {"abc", "acb"} for order in itertools.permutations([a, b, c]): if "".join([c.id for c in order]) in valid_orders: self.assertTrue(compiler_utils.ensure_topological_order(order)) else: self.assertFalse(compiler_utils.ensure_topological_order(order))
def compile(self, tfx_pipeline: pipeline.Pipeline) -> pipeline_pb2.Pipeline: """Compiles a tfx pipeline into uDSL proto. Args: tfx_pipeline: A TFX pipeline. Returns: A Pipeline proto that encodes all necessary information of the pipeline. """ context = _CompilerContext(tfx_pipeline.pipeline_info) pipeline_pb = pipeline_pb2.Pipeline() pipeline_pb.pipeline_info.id = context.pipeline_info.pipeline_name compiler_utils.set_runtime_parameter_pb( pipeline_pb.runtime_spec.pipeline_root.runtime_parameter, constants.PIPELINE_ROOT_PARAMETER_NAME, str, context.pipeline_info.pipeline_root) compiler_utils.set_runtime_parameter_pb( pipeline_pb.runtime_spec.pipeline_run_id.runtime_parameter, constants.PIPELINE_RUN_ID_PARAMETER_NAME, str) assert compiler_utils.ensure_topological_order( tfx_pipeline.components), ( "Pipeline components are not topologically sorted.") for node in tfx_pipeline.components: component_pb = self._compile_node(node, context) pipeline_or_node = pipeline_pb.PipelineOrNode() pipeline_or_node.pipeline_node.CopyFrom(component_pb) # TODO(b/158713812): Support sub-pipeline. pipeline_pb.nodes.append(pipeline_or_node) context.component_pbs[node.id] = component_pb # Currently only synchronous mode is supported pipeline_pb.execution_mode = pipeline_pb2.Pipeline.ExecutionMode.SYNC return pipeline_pb
def _validate_pipeline(tfx_pipeline: pipeline.Pipeline): """Performs pre-compile validations.""" if (tfx_pipeline.execution_mode == pipeline.ExecutionMode.ASYNC and compiler_utils.has_task_dependency(tfx_pipeline)): raise ValueError("Task dependency is not supported in ASYNC mode.") if not compiler_utils.ensure_topological_order(tfx_pipeline.components): raise ValueError("Pipeline components are not topologically sorted.")
def compile(self, tfx_pipeline: pipeline.Pipeline) -> pipeline_pb2.Pipeline: """Compiles a tfx pipeline into uDSL proto. Args: tfx_pipeline: A TFX pipeline. Returns: A Pipeline proto that encodes all necessary information of the pipeline. """ context = _CompilerContext.from_tfx_pipeline(tfx_pipeline) pipeline_pb = pipeline_pb2.Pipeline() pipeline_pb.pipeline_info.id = context.pipeline_info.pipeline_name pipeline_pb.execution_mode = context.execution_mode compiler_utils.set_runtime_parameter_pb( pipeline_pb.runtime_spec.pipeline_root.runtime_parameter, constants.PIPELINE_ROOT_PARAMETER_NAME, str, context.pipeline_info.pipeline_root) if pipeline_pb.execution_mode == pipeline_pb2.Pipeline.ExecutionMode.SYNC: compiler_utils.set_runtime_parameter_pb( pipeline_pb.runtime_spec.pipeline_run_id.runtime_parameter, constants.PIPELINE_RUN_ID_PARAMETER_NAME, str) assert compiler_utils.ensure_topological_order( tfx_pipeline.components), ( "Pipeline components are not topologically sorted.") deployment_config = pipeline_pb2.IntermediateDeploymentConfig() if tfx_pipeline.metadata_connection_config: deployment_config.metadata_connection_config.Pack( tfx_pipeline.metadata_connection_config) for node in tfx_pipeline.components: # In ASYNC mode ResolverNode is merged into the downstream node as a # ResolverConfig if compiler_utils.is_resolver(node) and context.is_async_mode: continue node_pb = self._compile_node(node, context, deployment_config, tfx_pipeline.enable_cache) pipeline_or_node = pipeline_pb.PipelineOrNode() pipeline_or_node.pipeline_node.CopyFrom(node_pb) # TODO(b/158713812): Support sub-pipeline. pipeline_pb.nodes.append(pipeline_or_node) context.node_pbs[node.id] = node_pb if tfx_pipeline.platform_config: deployment_config.pipeline_level_platform_config.Pack( tfx_pipeline.platform_config) pipeline_pb.deployment_config.Pack(deployment_config) return pipeline_pb