Beispiel #1
0
 def testEnsureTopologicalOrder(self):
   a = EmptyComponent(name="a")
   b = EmptyComponent(name="b")
   c = EmptyComponent(name="c")
   a.add_downstream_node(b)
   a.add_downstream_node(c)
   valid_orders = {"abc", "acb"}
   for order in itertools.permutations([a, b, c]):
     if "".join([c.id for c in order]) in valid_orders:
       self.assertTrue(compiler_utils.ensure_topological_order(order))
     else:
       self.assertFalse(compiler_utils.ensure_topological_order(order))
Beispiel #2
0
    def compile(self,
                tfx_pipeline: pipeline.Pipeline) -> pipeline_pb2.Pipeline:
        """Compiles a tfx pipeline into uDSL proto.

    Args:
      tfx_pipeline: A TFX pipeline.

    Returns:
      A Pipeline proto that encodes all necessary information of the pipeline.
    """
        context = _CompilerContext(tfx_pipeline.pipeline_info)
        pipeline_pb = pipeline_pb2.Pipeline()
        pipeline_pb.pipeline_info.id = context.pipeline_info.pipeline_name
        compiler_utils.set_runtime_parameter_pb(
            pipeline_pb.runtime_spec.pipeline_root.runtime_parameter,
            constants.PIPELINE_ROOT_PARAMETER_NAME, str,
            context.pipeline_info.pipeline_root)
        compiler_utils.set_runtime_parameter_pb(
            pipeline_pb.runtime_spec.pipeline_run_id.runtime_parameter,
            constants.PIPELINE_RUN_ID_PARAMETER_NAME, str)

        assert compiler_utils.ensure_topological_order(
            tfx_pipeline.components), (
                "Pipeline components are not topologically sorted.")
        for node in tfx_pipeline.components:
            component_pb = self._compile_node(node, context)
            pipeline_or_node = pipeline_pb.PipelineOrNode()
            pipeline_or_node.pipeline_node.CopyFrom(component_pb)
            # TODO(b/158713812): Support sub-pipeline.
            pipeline_pb.nodes.append(pipeline_or_node)
            context.component_pbs[node.id] = component_pb

        # Currently only synchronous mode is supported
        pipeline_pb.execution_mode = pipeline_pb2.Pipeline.ExecutionMode.SYNC
        return pipeline_pb
Beispiel #3
0
def _validate_pipeline(tfx_pipeline: pipeline.Pipeline):
    """Performs pre-compile validations."""
    if (tfx_pipeline.execution_mode == pipeline.ExecutionMode.ASYNC
            and compiler_utils.has_task_dependency(tfx_pipeline)):
        raise ValueError("Task dependency is not supported in ASYNC mode.")

    if not compiler_utils.ensure_topological_order(tfx_pipeline.components):
        raise ValueError("Pipeline components are not topologically sorted.")
Beispiel #4
0
    def compile(self,
                tfx_pipeline: pipeline.Pipeline) -> pipeline_pb2.Pipeline:
        """Compiles a tfx pipeline into uDSL proto.

    Args:
      tfx_pipeline: A TFX pipeline.

    Returns:
      A Pipeline proto that encodes all necessary information of the pipeline.
    """
        context = _CompilerContext.from_tfx_pipeline(tfx_pipeline)
        pipeline_pb = pipeline_pb2.Pipeline()
        pipeline_pb.pipeline_info.id = context.pipeline_info.pipeline_name
        pipeline_pb.execution_mode = context.execution_mode
        compiler_utils.set_runtime_parameter_pb(
            pipeline_pb.runtime_spec.pipeline_root.runtime_parameter,
            constants.PIPELINE_ROOT_PARAMETER_NAME, str,
            context.pipeline_info.pipeline_root)
        if pipeline_pb.execution_mode == pipeline_pb2.Pipeline.ExecutionMode.SYNC:
            compiler_utils.set_runtime_parameter_pb(
                pipeline_pb.runtime_spec.pipeline_run_id.runtime_parameter,
                constants.PIPELINE_RUN_ID_PARAMETER_NAME, str)

        assert compiler_utils.ensure_topological_order(
            tfx_pipeline.components), (
                "Pipeline components are not topologically sorted.")
        deployment_config = pipeline_pb2.IntermediateDeploymentConfig()
        if tfx_pipeline.metadata_connection_config:
            deployment_config.metadata_connection_config.Pack(
                tfx_pipeline.metadata_connection_config)
        for node in tfx_pipeline.components:
            # In ASYNC mode ResolverNode is merged into the downstream node as a
            # ResolverConfig
            if compiler_utils.is_resolver(node) and context.is_async_mode:
                continue
            node_pb = self._compile_node(node, context, deployment_config,
                                         tfx_pipeline.enable_cache)
            pipeline_or_node = pipeline_pb.PipelineOrNode()
            pipeline_or_node.pipeline_node.CopyFrom(node_pb)
            # TODO(b/158713812): Support sub-pipeline.
            pipeline_pb.nodes.append(pipeline_or_node)
            context.node_pbs[node.id] = node_pb

        if tfx_pipeline.platform_config:
            deployment_config.pipeline_level_platform_config.Pack(
                tfx_pipeline.platform_config)
        pipeline_pb.deployment_config.Pack(deployment_config)
        return pipeline_pb