Beispiel #1
0
    def run(
        self,
        component: base_node.BaseNode,
        enable_cache: bool = True,
        beam_pipeline_args: Optional[List[Text]] = None
    ) -> execution_result.ExecutionResult:
        """Run a given TFX component in the interactive context.

    Args:
      component: Component instance to be run.
      enable_cache: whether caching logic should be enabled in the driver.
      beam_pipeline_args: Optional Beam pipeline args for beam jobs within
        executor. Executor will use beam DirectRunner as Default. If provided,
        will override beam_pipeline_args specified in constructor.

    Returns:
      execution_result.ExecutionResult object.
    """
        run_id = datetime.datetime.now().isoformat()
        pipeline_info = data_types.PipelineInfo(
            pipeline_name=self.pipeline_name,
            pipeline_root=self.pipeline_root,
            run_id=run_id)
        driver_args = data_types.DriverArgs(enable_cache=enable_cache,
                                            interactive_resolution=True)
        metadata_connection = metadata.Metadata(
            self.metadata_connection_config)
        beam_pipeline_args = beam_pipeline_args or self.beam_pipeline_args
        additional_pipeline_args = {}
        for name, output in component.outputs.items():
            for artifact in output.get():
                artifact.pipeline_name = self.pipeline_name
                artifact.producer_component = component.id
                artifact.name = name
        # Special treatment for pip dependencies.
        # TODO(b/187122662): Pass through pip dependencies as a first-class
        # component flag.
        if isinstance(component, base_component.BaseComponent):
            component._resolve_pip_dependencies(self.pipeline_root)  # pylint: disable=protected-access
        # TODO(hongyes): figure out how to resolve launcher class in the interactive
        # context.
        launcher = in_process_component_launcher.InProcessComponentLauncher.create(
            component, pipeline_info, driver_args, metadata_connection,
            beam_pipeline_args, additional_pipeline_args)
        try:
            import colab  # pytype: disable=import-error # pylint: disable=g-import-not-at-top, unused-import, unused-variable
            runner_label = 'interactivecontext-colab'
        except ImportError:
            runner_label = 'interactivecontext'
        with telemetry_utils.scoped_labels({
                telemetry_utils.LABEL_TFX_RUNNER:
                runner_label,
        }):
            execution_id = launcher.launch().execution_id

        return execution_result.ExecutionResult(component=component,
                                                execution_id=execution_id)
Beispiel #2
0
  def run(self,
          component: base_component.BaseComponent,
          enable_cache: bool = True) -> execution_result.ExecutionResult:
    """Run a given TFX component in the interactive context.

    Args:
      component: Component instance to be run.
      enable_cache: whether caching logic should be enabled in the driver.

    Returns:
      execution_result.ExecutionResult object.
    """
    run_id = datetime.datetime.now().isoformat()
    pipeline_info = data_types.PipelineInfo(
        pipeline_name=self.pipeline_name,
        pipeline_root=self.pipeline_root,
        run_id=run_id)
    driver_args = data_types.DriverArgs(
        enable_cache=enable_cache,
        interactive_resolution=True)
    try:
      parallelism = multiprocessing.cpu_count()
    except NotImplementedError:
      absl.logging.info('Using a single process for Beam pipeline execution.')
      parallelism = 1
    beam_pipeline_args = ['--direct_num_workers=%d' % parallelism]
    additional_pipeline_args = {}
    for name, output in component.outputs.get_all().items():
      for artifact in output.get():
        artifact.pipeline_name = self.pipeline_name
        artifact.producer_component = component.id
        artifact.run_id = run_id
        artifact.name = name
    # TODO(hongyes): figure out how to resolve launcher class in the interactive
    # context.
    launcher = in_process_component_launcher.InProcessComponentLauncher.create(
        component, pipeline_info, driver_args, self.metadata_connection_config,
        beam_pipeline_args, additional_pipeline_args)
    execution_id = launcher.launch()

    return execution_result.ExecutionResult(
        component=component,
        execution_id=execution_id)
    def run(
        self,
        component: base_component.BaseComponent,
        enable_cache: bool = True,
        beam_pipeline_args: Optional[List[Text]] = None
    ) -> execution_result.ExecutionResult:
        """Run a given TFX component in the interactive context.

    Args:
      component: Component instance to be run.
      enable_cache: whether caching logic should be enabled in the driver.
      beam_pipeline_args: Optional Beam pipeline args for beam jobs within
        executor. Executor will use beam DirectRunner as Default.

    Returns:
      execution_result.ExecutionResult object.
    """
        run_id = datetime.datetime.now().isoformat()
        pipeline_info = data_types.PipelineInfo(
            pipeline_name=self.pipeline_name,
            pipeline_root=self.pipeline_root,
            run_id=run_id)
        driver_args = data_types.DriverArgs(enable_cache=enable_cache,
                                            interactive_resolution=True)
        metadata_connection = metadata.Metadata(
            self.metadata_connection_config)
        beam_pipeline_args = beam_pipeline_args or []
        additional_pipeline_args = {}
        for name, output in component.outputs.get_all().items():
            for artifact in output.get():
                artifact.pipeline_name = self.pipeline_name
                artifact.producer_component = component.id
                artifact.run_id = run_id
                artifact.name = name
        # TODO(hongyes): figure out how to resolve launcher class in the interactive
        # context.
        launcher = in_process_component_launcher.InProcessComponentLauncher.create(
            component, pipeline_info, driver_args, metadata_connection,
            beam_pipeline_args, additional_pipeline_args)
        execution_id = launcher.launch().execution_id

        return execution_result.ExecutionResult(component=component,
                                                execution_id=execution_id)