def testLauncher_with_CustomDriver_ExistingSpan(self): LauncherTest.fakeExampleGenOutput(self._mlmd_connection, self._example_gen, 2, 1) test_launcher = launcher.Launcher( pipeline_node=self._example_gen, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec, executor_spec=self._trainer_executor_spec, custom_driver_spec=self._custom_driver_spec, custom_executor_operators=self._test_executor_operators) _ = test_launcher.launch() with self._mlmd_connection as m: artifact = m.store.get_artifacts_by_type('Examples')[1] self.assertProtoPartiallyEquals( """ id: 2 type_id: 4 custom_properties { key: "name" value { string_value: ":test_run_0:my_example_gen:output_examples:0" } } custom_properties { key: "span" value { int_value: 2 } } custom_properties { key: "version" value { int_value: 2 } } state: LIVE""", artifact, ignored_fields=[ 'uri', 'create_time_since_epoch', 'last_update_time_since_epoch' ])
def testLauncher_EmptyOptionalInputTriggersExecution(self): # In this test case, both inputs of trainer are mark as optional. So even # when there is no input from them, the trainer can stil be triggered. self._trainer.inputs.inputs['examples'].min_count = 0 self._trainer.inputs.inputs['transform_graph'].min_count = 0 test_launcher = launcher.Launcher( pipeline_node=self._trainer, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec, executor_spec=self._trainer_executor_spec, custom_executor_operators=self._test_executor_operators) execution_metadata = test_launcher.launch() with self._mlmd_connection as m: [artifact] = m.store.get_artifacts_by_type('Model') self.assertProtoPartiallyEquals(""" id: 1 type_id: 6 custom_properties { key: "name" value { string_value: ":test_run_0:my_trainer:model:0" } } state: LIVE""", artifact, ignored_fields=[ 'uri', 'create_time_since_epoch', 'last_update_time_since_epoch' ]) [execution] = m.store.get_executions_by_id([execution_metadata.id]) self.assertProtoPartiallyEquals(""" id: 1 type_id: 4 last_known_state: COMPLETE """, execution, ignored_fields=[ 'create_time_since_epoch', 'last_update_time_since_epoch' ])
def testLauncher_CacheIsSupportedForNodeWithNoOutput(self): # Even though a node has no output at all, the launcher should treat the # second execution as CACHED as long as the cache context is the same. LauncherTest.fakeUpstreamOutputs(self._mlmd_connection, self._example_gen, self._transform) self._trainer.ClearField('outputs') test_launcher = launcher.Launcher( pipeline_node=self._trainer, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec, executor_spec=self._trainer_executor_spec, custom_executor_operators=self._test_executor_operators) execution_info = test_launcher.launch() with self._mlmd_connection as m: [execution ] = m.store.get_executions_by_id([execution_info.execution_id]) self.assertProtoPartiallyEquals(""" id: 3 last_known_state: COMPLETE """, execution, ignored_fields=[ 'type_id', 'create_time_since_epoch', 'last_update_time_since_epoch' ]) execution_info = test_launcher.launch() with self._mlmd_connection as m: [execution ] = m.store.get_executions_by_id([execution_info.execution_id]) self.assertProtoPartiallyEquals(""" id: 4 last_known_state: CACHED """, execution, ignored_fields=[ 'type_id', 'create_time_since_epoch', 'last_update_time_since_epoch' ])
def testLauncher_ExecutionFailedViaReturnCode(self): # In the case that the executor failed and raises an execption. # An Execution will be published. self.reloadPipelineWithNewRunId() LauncherTest.fakeUpstreamOutputs(self._mlmd_connection, self._example_gen, self._transform) executor_operators = { _PYTHON_CLASS_EXECUTABLE_SPEC: _FakeErrorExecutorOperator } test_launcher = launcher.Launcher( pipeline_node=self._trainer, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec, executor_spec=self._trainer_executor_spec, custom_executor_operators=executor_operators) with self.assertRaisesRegex( Exception, 'Execution .* failed with error code .* and error message .*'): _ = test_launcher.launch() with self._mlmd_connection as m: artifacts = m.store.get_artifacts_by_type('Model') self.assertEmpty(artifacts) executions = m.store.get_executions() self.assertProtoPartiallyEquals(""" id: 3 last_known_state: FAILED custom_properties { key: '__execution_result__' value { string_value: '{\\n "resultMessage": "execution canceled.",\\n "code": 1\\n}' } } """, executions[-1], ignored_fields=[ 'type_id', 'create_time_since_epoch', 'last_update_time_since_epoch' ])
def testLauncher_InputPartiallyReady(self): # No new execution is triggered and registered if all inputs are not ready. LauncherTest.fakeUpstreamOutputs(self._mlmd_connection, self._example_gen, None) test_launcher = launcher.Launcher( pipeline_node=self._trainer, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec, executor_spec=self._trainer_executor_spec, custom_executor_operators=self._test_executor_operators) with self._mlmd_connection as m: existing_exeuctions = m.store.get_executions() execution_metadata = test_launcher.launch() self.assertIsNone(execution_metadata) with self._mlmd_connection as m: # No new execution is registered in MLMD. self.assertCountEqual(existing_exeuctions, m.store.get_executions())
def __init__(self, pipeline_node: pipeline_pb2.PipelineNode, mlmd_connection: metadata.Metadata, pipeline_info: pipeline_pb2.PipelineInfo, pipeline_runtime_spec: pipeline_pb2.PipelineRuntimeSpec): """Initializes the _PipelineNodeAsDoFn. Args: pipeline_node: The specification of the node that this launcher lauches. mlmd_connection: ML metadata connection. The connection is expected to not be opened before launcher is initiated. pipeline_info: The information of the pipeline that this node runs in. pipeline_runtime_spec: The runtime information of the pipeline that this node runs in. """ self._launcher = launcher.Launcher( pipeline_node=pipeline_node, mlmd_connection=mlmd_connection, pipeline_info=pipeline_info, pipeline_runtime_spec=pipeline_runtime_spec) self._component_id = pipeline_node.node_info.id
def testLauncher_resolver_node(self): mock_resolver_node_handler_class = mock.create_autospec( system_node_handler.SystemNodeHandler) mock_resolver_node_handler = mock.create_autospec( system_node_handler.SystemNodeHandler, instance=True) mock_resolver_node_handler_class.return_value = mock_resolver_node_handler expected_execution_info = data_types.ExecutionInfo() expected_execution_info.execution_id = 123 mock_resolver_node_handler.run.return_value = expected_execution_info launcher._SYSTEM_NODE_HANDLERS[ 'tfx.dsl.components.common.resolver.Resolver'] = ( mock_resolver_node_handler_class) test_launcher = launcher.Launcher( pipeline_node=self._resolver, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec) execution_info = test_launcher.launch() mock_resolver_node_handler.run.assert_called_once_with( self._mlmd_connection, self._resolver, self._pipeline_info, self._pipeline_runtime_spec) self.assertEqual(execution_info, expected_execution_info)
def testLauncher_importer_node(self): mock_import_node_handler_class = mock.create_autospec( system_node_handler.SystemNodeHandler) mock_import_node_handler = mock.create_autospec( system_node_handler.SystemNodeHandler, instance=True) mock_import_node_handler_class.return_value = mock_import_node_handler expected_execution = metadata_store_pb2.Execution() expected_execution.id = 123 mock_import_node_handler.run.return_value = expected_execution launcher._SYSTEM_NODE_HANDLERS[ 'tfx.components.common_nodes.importer_node.ImporterNode'] = ( mock_import_node_handler_class) test_launcher = launcher.Launcher( pipeline_node=self._importer, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec) execution_metadata = test_launcher.launch() mock_import_node_handler.run.assert_called_once_with( self._mlmd_connection, self._importer, self._pipeline_info, self._pipeline_runtime_spec) self.assertEqual(execution_metadata, expected_execution)
def testLauncher_CacheDisabled(self): # In this test case, there are two executions: # In the first one,trainer reads the fake upstream outputs and publish # a new output. # In the second one, because the enable_cache is false and inputs don't # change. The launcher will published a new COMPLETE execution. self._trainer.execution_options.caching_options.enable_cache = False LauncherTest.fakeUpstreamOutputs(self._mlmd_connection, self._example_gen, self._transform) test_launcher = launcher.Launcher( pipeline_node=self._trainer, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec, executor_spec=self._trainer_executor_spec, custom_executor_operators=self._test_executor_operators) execution_metadata = test_launcher.launch() with self._mlmd_connection as m: [artifact] = m.store.get_artifacts_by_type('Model') self.assertProtoPartiallyEquals(""" id: 3 type_id: 10 custom_properties { key: "name" value { string_value: ":test_run_0:my_trainer:model:0" } } state: LIVE""", artifact, ignored_fields=[ 'uri', 'create_time_since_epoch', 'last_update_time_since_epoch' ]) [execution] = m.store.get_executions_by_id([execution_metadata.id]) self.assertProtoPartiallyEquals(""" id: 3 type_id: 8 last_known_state: COMPLETE """, execution, ignored_fields=[ 'create_time_since_epoch', 'last_update_time_since_epoch' ]) execution_metadata = test_launcher.launch() with self._mlmd_connection as m: artifacts = m.store.get_artifacts_by_type('Model') self.assertLen(artifacts, 2) self.assertProtoPartiallyEquals(""" id: 4 type_id: 10 custom_properties { key: "name" value { string_value: ":test_run_0:my_trainer:model:0" } } state: LIVE""", artifacts[1], ignored_fields=[ 'uri', 'create_time_since_epoch', 'last_update_time_since_epoch' ]) [execution] = m.store.get_executions_by_id([execution_metadata.id]) self.assertProtoPartiallyEquals(""" id: 4 type_id: 8 last_known_state: COMPLETE """, execution, ignored_fields=[ 'create_time_since_epoch', 'last_update_time_since_epoch' ])
def testLauncher_PublishingNewArtifactsAndUseCache(self): # In this test case, there are two executions: # In the first one,trainer reads the fake upstream outputs and publish # a new output. # In the second one, because the enable_cache is true and inputs don't # change. The launcher will published a CACHED execution. self.reloadPipelineWithNewRunId() LauncherTest.fakeUpstreamOutputs(self._mlmd_connection, self._example_gen, self._transform) execution_info = launcher.Launcher( pipeline_node=self._trainer, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec, executor_spec=self._trainer_executor_spec, custom_executor_operators=self._test_executor_operators).launch() with self._mlmd_connection as m: [artifact] = m.store.get_artifacts_by_type('Model') self.assertProtoPartiallyEquals(""" id: 3 custom_properties { key: "name" value { string_value: ":test_run_%d:my_trainer:model:0" } } custom_properties { key: "tfx_version" value { string_value: "0.123.4.dev" } } state: LIVE""" % self._pipeline_run_id_counter, artifact, ignored_fields=[ 'type_id', 'uri', 'create_time_since_epoch', 'last_update_time_since_epoch' ]) [execution ] = m.store.get_executions_by_id([execution_info.execution_id]) self.assertProtoPartiallyEquals(""" id: 3 last_known_state: COMPLETE """, execution, ignored_fields=[ 'type_id', 'create_time_since_epoch', 'last_update_time_since_epoch' ]) self.reloadPipelineWithNewRunId() execution_info = launcher.Launcher( pipeline_node=self._trainer, mlmd_connection=self._mlmd_connection, pipeline_info=self._pipeline_info, pipeline_runtime_spec=self._pipeline_runtime_spec, executor_spec=self._trainer_executor_spec, custom_executor_operators=self._test_executor_operators).launch() with self._mlmd_connection as m: [execution ] = m.store.get_executions_by_id([execution_info.execution_id]) self.assertProtoPartiallyEquals(""" id: 4 last_known_state: CACHED """, execution, ignored_fields=[ 'type_id', 'create_time_since_epoch', 'last_update_time_since_epoch' ])
def main(argv): # Log to the container's stdout so Kubeflow Pipelines UI can display logs to # the user. logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--pipeline_root', type=str, required=True) parser.add_argument('--metadata_ui_path', type=str, required=False, default='/mlpipeline-ui-metadata.json') parser.add_argument('--kubeflow_metadata_config', type=str, required=True) parser.add_argument('--tfx_ir', type=str, required=True) parser.add_argument('--node_id', type=str, required=True) # There might be multiple runtime parameters. # `args.runtime_parameter` should become List[str] by using "append". parser.add_argument('--runtime_parameter', type=str, action='append') # TODO(b/196892362): Replace hooking with a more straightforward mechanism. launcher._register_execution = _register_execution # pylint: disable=protected-access args = parser.parse_args(argv) tfx_ir = pipeline_pb2.Pipeline() json_format.Parse(args.tfx_ir, tfx_ir) _resolve_runtime_parameters(tfx_ir, args.runtime_parameter) deployment_config = runner_utils.extract_local_deployment_config(tfx_ir) kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig() json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config) metadata_connection = metadata.Metadata( _get_metadata_connection_config(kubeflow_metadata_config)) node_id = args.node_id # Attach necessary labels to distinguish different runner and DSL. # TODO(zhitaoli): Pass this from KFP runner side when the same container # entrypoint can be used by a different runner. with telemetry_utils.scoped_labels({ telemetry_utils.LABEL_TFX_RUNNER: 'kfp', }): custom_executor_operators = { executable_spec_pb2.ContainerExecutableSpec: kubernetes_executor_operator.KubernetesExecutorOperator } executor_spec = runner_utils.extract_executor_spec( deployment_config, node_id) custom_driver_spec = runner_utils.extract_custom_driver_spec( deployment_config, node_id) pipeline_node = _get_pipeline_node(tfx_ir, node_id) component_launcher = launcher.Launcher( pipeline_node=pipeline_node, mlmd_connection=metadata_connection, pipeline_info=tfx_ir.pipeline_info, pipeline_runtime_spec=tfx_ir.runtime_spec, executor_spec=executor_spec, custom_driver_spec=custom_driver_spec, custom_executor_operators=custom_executor_operators) logging.info('Component %s is running.', node_id) execution_info = component_launcher.launch() logging.info('Component %s is finished.', node_id) # Dump the UI metadata. _dump_ui_metadata(pipeline_node, execution_info, args.metadata_ui_path)
def run( self, pipeline: tfx_pipeline.Pipeline, run_name: Optional[str] = None ) -> None: """Runs given logical pipeline locally. Args: pipeline: Logical pipeline containing pipeline args and components. run_name: Optional name for the run. """ for component in pipeline.components: if isinstance(component, base_component.BaseComponent): component._resolve_pip_dependencies( pipeline.pipeline_info.pipeline_root ) c = compiler.Compiler() pipeline = c.compile(pipeline) run_name = run_name or datetime.now().strftime("%d_%h_%y-%H_%M_%S_%f") # Substitute the runtime parameter to be a concrete run_id runtime_parameter_utils.substitute_runtime_parameter( pipeline, { PIPELINE_RUN_ID_PARAMETER_NAME: run_name, }, ) deployment_config = runner_utils.extract_local_deployment_config( pipeline ) connection_config = deployment_config.metadata_connection_config # type: ignore[attr-defined] # noqa logger.debug(f"Using deployment config:\n {deployment_config}") logger.debug(f"Using connection config:\n {connection_config}") # Run each component. Note that the pipeline.components list is in # topological order. for node in pipeline.nodes: pipeline_node = node.pipeline_node node_id = pipeline_node.node_info.id executor_spec = runner_utils.extract_executor_spec( deployment_config, node_id ) custom_driver_spec = runner_utils.extract_custom_driver_spec( deployment_config, node_id ) component_launcher = launcher.Launcher( pipeline_node=pipeline_node, mlmd_connection=metadata.Metadata(connection_config), pipeline_info=pipeline.pipeline_info, pipeline_runtime_spec=pipeline.runtime_spec, executor_spec=executor_spec, custom_driver_spec=custom_driver_spec, ) start = time.time() logger.info(f"Step `{node_id}` has started.") component_launcher.launch() end = time.time() logger.info( f"Step `{node_id}` has finished" f" in {format_timedelta_pretty(end - start)}." )
def main(): # Log to the container's stdout so Kubeflow Pipelines UI can display logs to # the user. logging.basicConfig(stream=sys.stdout, level=logging.INFO) logging.getLogger().setLevel(logging.INFO) parser = argparse.ArgumentParser() parser.add_argument('--pipeline_root', type=str, required=True) parser.add_argument('--kubeflow_metadata_config', type=str, required=True) parser.add_argument('--serialized_component', type=str, required=True) parser.add_argument('--tfx_ir', type=str, required=True) parser.add_argument('--node_id', type=str, required=True) launcher._register_execution = _register_execution # pylint: disable=protected-access args = parser.parse_args() tfx_ir = pipeline_pb2.Pipeline() json_format.Parse(args.tfx_ir, tfx_ir) # Substitute the runtime parameter to be a concrete run_id runtime_parameter_utils.substitute_runtime_parameter( tfx_ir, { constants.PIPELINE_RUN_ID_PARAMETER_NAME: os.environ['WORKFLOW_ID'], }) deployment_config = runner_utils.extract_local_deployment_config(tfx_ir) kubeflow_metadata_config = kubeflow_pb2.KubeflowMetadataConfig() json_format.Parse(args.kubeflow_metadata_config, kubeflow_metadata_config) metadata_connection = kubeflow_metadata_adapter.KubeflowMetadataAdapter( _get_metadata_connection_config(kubeflow_metadata_config)) node_id = args.node_id # Attach necessary labels to distinguish different runner and DSL. # TODO(zhitaoli): Pass this from KFP runner side when the same container # entrypoint can be used by a different runner. with telemetry_utils.scoped_labels({ telemetry_utils.LABEL_TFX_RUNNER: 'kfp', }): custom_executor_operators = { executable_spec_pb2.ContainerExecutableSpec: kubernetes_executor_operator.KubernetesExecutorOperator } executor_spec = runner_utils.extract_executor_spec( deployment_config, node_id) custom_driver_spec = runner_utils.extract_custom_driver_spec( deployment_config, node_id) pipeline_node = _get_pipeline_node(tfx_ir, node_id) component_launcher = launcher.Launcher( pipeline_node=pipeline_node, mlmd_connection=metadata_connection, pipeline_info=tfx_ir.pipeline_info, pipeline_runtime_spec=tfx_ir.runtime_spec, executor_spec=executor_spec, custom_driver_spec=custom_driver_spec, custom_executor_operators=custom_executor_operators) logging.info('Component %s is running.', node_id) execution_info = component_launcher.launch() logging.info('Component %s is finished.', node_id) # Dump the UI metadata. _dump_ui_metadata(pipeline_node, execution_info)
def run_with_ir( self, pipeline: pipeline_pb2.Pipeline, run_options: Optional[pipeline_pb2.RunOptions] = None, ) -> None: """Runs given pipeline locally. Args: pipeline: Pipeline IR containing pipeline args and components. run_options: Optional args for the run. Raises: ValueError: If run_options is provided, and partial_run_options.from_nodes and partial_run_options.to_nodes are both empty. """ # Substitute the runtime parameter to be a concrete run_id runtime_parameter_utils.substitute_runtime_parameter( pipeline, { constants.PIPELINE_RUN_ID_PARAMETER_NAME: datetime.datetime.now().isoformat(), }) deployment_config = runner_utils.extract_local_deployment_config(pipeline) connection_config = getattr( deployment_config.metadata_connection_config, deployment_config.metadata_connection_config.WhichOneof( 'connection_config')) logging.info('Using deployment config:\n %s', deployment_config) logging.info('Using connection config:\n %s', connection_config) if run_options: logging.info('Using run_options:\n %s', run_options) pr_opts = run_options.partial_run partial_run_utils.mark_pipeline( pipeline, from_nodes=pr_opts.from_nodes or None, to_nodes=pr_opts.to_nodes or None, snapshot_settings=pr_opts.snapshot_settings) with telemetry_utils.scoped_labels( {telemetry_utils.LABEL_TFX_RUNNER: 'local'}): # Run each component. Note that the pipeline.components list is in # topological order. # # TODO(b/171319478): After IR-based execution is used, used multi-threaded # execution so that independent components can be run in parallel. for node in pipeline.nodes: pipeline_node = node.pipeline_node node_id = pipeline_node.node_info.id if pipeline_node.execution_options.HasField('skip'): logging.info('Skipping component %s.', node_id) continue executor_spec = runner_utils.extract_executor_spec( deployment_config, node_id) custom_driver_spec = runner_utils.extract_custom_driver_spec( deployment_config, node_id) component_launcher = launcher.Launcher( pipeline_node=pipeline_node, mlmd_connection=metadata.Metadata(connection_config), pipeline_info=pipeline.pipeline_info, pipeline_runtime_spec=pipeline.runtime_spec, executor_spec=executor_spec, custom_driver_spec=custom_driver_spec) logging.info('Component %s is running.', node_id) if pipeline_node.execution_options.run.perform_snapshot: with metadata.Metadata(connection_config) as mlmd_handle: partial_run_utils.snapshot(mlmd_handle, pipeline) component_launcher.launch() logging.info('Component %s is finished.', node_id)