def run( self, component: base_node.BaseNode, enable_cache: bool = True, beam_pipeline_args: Optional[List[Text]] = None ) -> execution_result.ExecutionResult: """Run a given TFX component in the interactive context. Args: component: Component instance to be run. enable_cache: whether caching logic should be enabled in the driver. beam_pipeline_args: Optional Beam pipeline args for beam jobs within executor. Executor will use beam DirectRunner as Default. If provided, will override beam_pipeline_args specified in constructor. Returns: execution_result.ExecutionResult object. """ run_id = datetime.datetime.now().isoformat() pipeline_info = data_types.PipelineInfo( pipeline_name=self.pipeline_name, pipeline_root=self.pipeline_root, run_id=run_id) driver_args = data_types.DriverArgs(enable_cache=enable_cache, interactive_resolution=True) metadata_connection = metadata.Metadata( self.metadata_connection_config) beam_pipeline_args = beam_pipeline_args or self.beam_pipeline_args additional_pipeline_args = {} for name, output in component.outputs.items(): for artifact in output.get(): artifact.pipeline_name = self.pipeline_name artifact.producer_component = component.id artifact.name = name # Special treatment for pip dependencies. # TODO(b/187122662): Pass through pip dependencies as a first-class # component flag. if isinstance(component, base_component.BaseComponent): component._resolve_pip_dependencies(self.pipeline_root) # pylint: disable=protected-access # TODO(hongyes): figure out how to resolve launcher class in the interactive # context. launcher = in_process_component_launcher.InProcessComponentLauncher.create( component, pipeline_info, driver_args, metadata_connection, beam_pipeline_args, additional_pipeline_args) try: import colab # pytype: disable=import-error # pylint: disable=g-import-not-at-top, unused-import, unused-variable runner_label = 'interactivecontext-colab' except ImportError: runner_label = 'interactivecontext' with telemetry_utils.scoped_labels({ telemetry_utils.LABEL_TFX_RUNNER: runner_label, }): execution_id = launcher.launch().execution_id return execution_result.ExecutionResult(component=component, execution_id=execution_id)
def run(self, component: base_component.BaseComponent, enable_cache: bool = True) -> execution_result.ExecutionResult: """Run a given TFX component in the interactive context. Args: component: Component instance to be run. enable_cache: whether caching logic should be enabled in the driver. Returns: execution_result.ExecutionResult object. """ run_id = datetime.datetime.now().isoformat() pipeline_info = data_types.PipelineInfo( pipeline_name=self.pipeline_name, pipeline_root=self.pipeline_root, run_id=run_id) driver_args = data_types.DriverArgs( enable_cache=enable_cache, interactive_resolution=True) try: parallelism = multiprocessing.cpu_count() except NotImplementedError: absl.logging.info('Using a single process for Beam pipeline execution.') parallelism = 1 beam_pipeline_args = ['--direct_num_workers=%d' % parallelism] additional_pipeline_args = {} for name, output in component.outputs.get_all().items(): for artifact in output.get(): artifact.pipeline_name = self.pipeline_name artifact.producer_component = component.id artifact.run_id = run_id artifact.name = name # TODO(hongyes): figure out how to resolve launcher class in the interactive # context. launcher = in_process_component_launcher.InProcessComponentLauncher.create( component, pipeline_info, driver_args, self.metadata_connection_config, beam_pipeline_args, additional_pipeline_args) execution_id = launcher.launch() return execution_result.ExecutionResult( component=component, execution_id=execution_id)
def run( self, component: base_component.BaseComponent, enable_cache: bool = True, beam_pipeline_args: Optional[List[Text]] = None ) -> execution_result.ExecutionResult: """Run a given TFX component in the interactive context. Args: component: Component instance to be run. enable_cache: whether caching logic should be enabled in the driver. beam_pipeline_args: Optional Beam pipeline args for beam jobs within executor. Executor will use beam DirectRunner as Default. Returns: execution_result.ExecutionResult object. """ run_id = datetime.datetime.now().isoformat() pipeline_info = data_types.PipelineInfo( pipeline_name=self.pipeline_name, pipeline_root=self.pipeline_root, run_id=run_id) driver_args = data_types.DriverArgs(enable_cache=enable_cache, interactive_resolution=True) metadata_connection = metadata.Metadata( self.metadata_connection_config) beam_pipeline_args = beam_pipeline_args or [] additional_pipeline_args = {} for name, output in component.outputs.get_all().items(): for artifact in output.get(): artifact.pipeline_name = self.pipeline_name artifact.producer_component = component.id artifact.run_id = run_id artifact.name = name # TODO(hongyes): figure out how to resolve launcher class in the interactive # context. launcher = in_process_component_launcher.InProcessComponentLauncher.create( component, pipeline_info, driver_args, metadata_connection, beam_pipeline_args, additional_pipeline_args) execution_id = launcher.launch().execution_id return execution_result.ExecutionResult(component=component, execution_id=execution_id)