def testLegacyBeamDagRunnerConstruction(self): self.assertIsInstance(beam_dag_runner.BeamDagRunner(), beam_dag_runner.BeamDagRunner) # Test that the legacy Beam DAG runner is used when a PipelineConfig is # specified. config = pipeline_config.PipelineConfig() runner = beam_dag_runner.BeamDagRunner(config=config) self.assertIs(runner.__class__, legacy_beam_dag_runner.BeamDagRunner) self.assertIs(runner._config, config) # Test that the legacy Beam DAG runner is used when beam_orchestrator_args # is specified. beam_orchestrator_args = ['--my-beam-option'] runner = beam_dag_runner.BeamDagRunner( beam_orchestrator_args=beam_orchestrator_args) self.assertIs(runner.__class__, legacy_beam_dag_runner.BeamDagRunner) self.assertIs(runner._beam_orchestrator_args, beam_orchestrator_args) # Test that the legacy Beam DAG runner is used when both a PipelineConfig # and beam_orchestrator_args are specified. config = pipeline_config.PipelineConfig() beam_orchestrator_args = ['--my-beam-option'] runner = beam_dag_runner.BeamDagRunner( config=config, beam_orchestrator_args=beam_orchestrator_args) self.assertIs(runner.__class__, legacy_beam_dag_runner.BeamDagRunner) self.assertIs(runner._config, config) self.assertIs(runner._beam_orchestrator_args, beam_orchestrator_args)
def testInitSucceed(self): # Init with default parameters pipeline_config.PipelineConfig() # Init with custom parameters pipeline_config.PipelineConfig( supported_launcher_classes=[ in_process_component_launcher.InProcessComponentLauncher ], default_component_configs=[ docker_component_config.DockerComponentConfig() ], component_config_overrides={ 'comp-1', docker_component_config.DockerComponentConfig() })
def testFindComponentLaunchInfoReturnConfigOverride(self): input_artifact = test_utils._InputArtifact() component = test_utils._FakeComponent( name='FakeComponent', input_channel=channel_utils.as_channel([input_artifact]), custom_executor_spec=executor_spec.ExecutorContainerSpec( image='gcr://test', args=['{{input_dict["input"][0].uri}}'])) default_config = docker_component_config.DockerComponentConfig() override_config = docker_component_config.DockerComponentConfig( name='test') p_config = pipeline_config.PipelineConfig( supported_launcher_classes=[ docker_component_launcher.DockerComponentLauncher ], default_component_configs=[default_config], component_config_overrides={ '_FakeComponent.FakeComponent': override_config }) (launcher_class, c_config) = config_utils.find_component_launch_info( p_config, component) self.assertEqual(docker_component_launcher.DockerComponentLauncher, launcher_class) self.assertEqual(override_config, c_config)
def testNoSupportedLaunchers(self): config = pipeline_config.PipelineConfig( supported_launcher_classes=[ docker_component_launcher.DockerComponentLauncher]) runner = local_dag_runner.LocalDagRunner(config=config) with self.assertRaisesRegex(RuntimeError, 'No launcher info can be found'): runner.run(self._getTestPipeline())
def __init__(self, config: Optional[pipeline_config.PipelineConfig] = None): """Initializes a TfxRunner instance. Args: config: Optional pipeline config for customizing the launching of each component. """ self._config = config or pipeline_config.PipelineConfig()
def testFindComponentLaunchInfoFailWithNoLauncherClassFound(self): input_artifact = test_utils._InputArtifact() component = test_utils._FakeComponent( name='FakeComponent', input_channel=channel_utils.as_channel([input_artifact])) p_config = pipeline_config.PipelineConfig(supported_launcher_classes=[ docker_component_launcher.DockerComponentLauncher ]) with self.assertRaises(RuntimeError): # DockerComponentLauncher cannot launch class executor. config_utils.find_component_launch_info(p_config, component)
def testFindComponentLaunchInfoReturnDefaultLaunchInfo(self): input_artifact = types.Artifact(type_name='InputPath') component = test_utils._FakeComponent( name='FakeComponent', input_channel=channel_utils.as_channel([input_artifact])) p_config = pipeline_config.PipelineConfig() (launcher_class, c_config) = config_utils.find_component_launch_info(p_config, component) self.assertEqual(in_process_component_launcher.InProcessComponentLauncher, launcher_class) self.assertIsNone(c_config)
def __init__(self, config: Optional[pipeline_config.PipelineConfig] = None): """Initializes local TFX orchestrator. Args: config: Optional pipeline config for customizing the launching of each component. Defaults to pipeline config that supports InProcessComponentLauncher and DockerComponentLauncher. """ if config is None: config = pipeline_config.PipelineConfig( supported_launcher_classes=[ in_process_component_launcher.InProcessComponentLauncher, docker_component_launcher.DockerComponentLauncher, ], ) super(LocalDagRunner, self).__init__(config)
def testDockerComponentLauncherInBeam(self): beam_dag_runner.BeamDagRunner(config=pipeline_config.PipelineConfig( supported_launcher_classes=[ docker_component_launcher.DockerComponentLauncher ], default_component_configs=[ docker_component_config.DockerComponentConfig() ])).run( _create_pipeline(pipeline_name=self._pipeline_name, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, name='docker_e2e_test_in_beam')) metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) with metadata.Metadata(metadata_config) as m: self.assertEqual(1, len(m.store.get_executions()))
def __init__(self, beam_orchestrator_args: Optional[List[Text]] = None, config: Optional[pipeline_config.PipelineConfig] = None): """Initializes BeamDagRunner as a TFX orchestrator. Args: beam_orchestrator_args: beam args for the beam orchestrator. Note that this is different from the beam_pipeline_args within additional_pipeline_args, which is for beam pipelines in components. config: Optional pipeline config for customizing the launching of each component. Defaults to pipeline config that supports InProcessComponentLauncher and DockerComponentLauncher. """ if config is None: config = pipeline_config.PipelineConfig( supported_launcher_classes=[ in_process_component_launcher.InProcessComponentLauncher, docker_component_launcher.DockerComponentLauncher, ], ) super(BeamDagRunner, self).__init__(config) self._beam_orchestrator_args = beam_orchestrator_args
def testStubbedTaxiPipelineBeam(self): # Run pipeline with stub executors. stub_component_launcher.StubComponentLauncher.initialize( test_data_dir=self._recorded_output_dir, test_component_ids=[]) stub_pipeline_config = pipeline_config.PipelineConfig( supported_launcher_classes=[ stub_component_launcher.StubComponentLauncher, ]) local_dag_runner.LocalDagRunner(config=stub_pipeline_config).run( self.taxi_pipeline) self.assertTrue(fileio.exists(self._metadata_path)) metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) # Verify that recorded files are successfully copied to the output uris. with metadata.Metadata(metadata_config) as m: artifacts = m.store.get_artifacts() artifact_count = len(artifacts) executions = m.store.get_executions() execution_count = len(executions) # Artifact count is greater by 3 due to extra artifacts produced by # Evaluator(blessing and evaluation), Trainer(model and model_run) and # Transform(example, graph, cache) minus Resolver which doesn't generate # new artifact. self.assertEqual(artifact_count, execution_count + 3) self.assertLen(self.taxi_pipeline.components, execution_count) for execution in executions: component_id = execution.properties[ metadata._EXECUTION_TYPE_KEY_COMPONENT_ID].string_value # pylint: disable=protected-access if component_id.startswith('ResolverNode'): continue eid = [execution.id] events = m.store.get_events_by_execution_ids(eid) output_events = [ x for x in events if x.type == metadata_store_pb2.Event.OUTPUT ] for event in output_events: steps = event.path.steps self.assertTrue(steps[0].HasField('key')) name = steps[0].key artifacts = m.store.get_artifacts_by_id( [event.artifact_id]) for idx, artifact in enumerate(artifacts): self.assertDirectoryEqual( artifact.uri, os.path.join(self._recorded_output_dir, component_id, name, str(idx))) # Calls verifier for pipeline output artifacts, excluding the resolver node. local_dag_runner.LocalDagRunner().run(self.taxi_pipeline) pipeline_outputs = executor_verifier_utils.get_pipeline_outputs( self.taxi_pipeline.metadata_connection_config, self.taxi_pipeline.pipeline_info) verifier_map = { 'model': self._verify_model, 'model_run': self._verify_model, 'examples': self._verify_examples, 'schema': self._verify_schema, 'anomalies': self._verify_anomalies, 'evaluation': self._verify_evaluation } # List of components to verify. ResolverNode is ignored because it # doesn't have an executor. verify_component_ids = [ component.id for component in self.taxi_pipeline.components if not component.id.startswith('ResolverNode') ] for component_id in verify_component_ids: logging.info('Verifying %s', component_id) for key, artifact_dict in pipeline_outputs[component_id].items(): for idx, artifact in artifact_dict.items(): recorded_uri = os.path.join(self._recorded_output_dir, component_id, key, str(idx)) verifier_map.get(key, self._verify_file_path)(artifact.uri, recorded_uri)
def testStubbedImdbPipelineBeam(self): # Runs the pipeline and record to self._recorded_output_dir stub_component_launcher.StubComponentLauncher.initialize( test_data_dir=self._recorded_output_dir, test_component_ids=[]) stub_pipeline_config = pipeline_config.PipelineConfig( supported_launcher_classes=[ stub_component_launcher.StubComponentLauncher, ]) BeamDagRunner(config=stub_pipeline_config).run(self.imdb_pipeline) self.assertTrue(fileio.exists(self._metadata_path)) metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) # Verify that recorded files are successfully copied to the output uris. with metadata.Metadata(metadata_config) as m: for execution in m.store.get_executions(): component_id = execution.properties[ metadata._EXECUTION_TYPE_KEY_COMPONENT_ID].string_value # pylint: disable=protected-access if component_id.startswith('ResolverNode'): continue eid = [execution.id] events = m.store.get_events_by_execution_ids(eid) output_events = [ x for x in events if x.type == metadata_store_pb2.Event.OUTPUT ] for event in output_events: steps = event.path.steps assert steps[0].HasField('key') name = steps[0].key artifacts = m.store.get_artifacts_by_id( [event.artifact_id]) for idx, artifact in enumerate(artifacts): self.assertDirectoryEqual( artifact.uri, os.path.join(self._recorded_output_dir, component_id, name, str(idx))) # Calls verifier for pipeline output artifacts, excluding the resolver node. BeamDagRunner().run(self.imdb_pipeline) pipeline_outputs = executor_verifier_utils.get_pipeline_outputs( self.imdb_pipeline.metadata_connection_config, self.imdb_pipeline.pipeline_info) verifier_map = { 'model': self._verify_model, 'model_run': self._verify_model, 'examples': self._verify_examples, 'schema': self._verify_schema, 'anomalies': self._verify_anomalies, 'evaluation': self._verify_evaluation } # List of components to verify. ResolverNode is ignored because it # doesn't have an executor. verify_component_ids = [ component.id for component in self.imdb_pipeline.components if not component.id.startswith('ResolverNode') ] for component_id in verify_component_ids: for key, artifact_dict in pipeline_outputs[component_id].items(): for idx, artifact in artifact_dict.items(): logging.info('Verifying %s', component_id) recorded_uri = os.path.join(self._recorded_output_dir, component_id, key, str(idx)) verifier_map.get(key, self._verify_file_path)(artifact.uri, recorded_uri)
def testTaxiPipelineBeam(self): # Runs the pipeline and record to self._recorded_output_dir record_taxi_pipeline = taxi_pipeline_beam._create_pipeline( # pylint:disable=protected-access pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._recorded_mlmd_path, beam_pipeline_args=[]) BeamDagRunner().run(record_taxi_pipeline) pipeline_recorder_utils.record_pipeline( output_dir=self._recorded_output_dir, metadata_db_uri=self._recorded_mlmd_path, host=None, port=None, pipeline_name=self._pipeline_name, run_id=None) # Run pipeline with stub executors. taxi_pipeline = taxi_pipeline_beam._create_pipeline( # pylint:disable=protected-access pipeline_name=self._pipeline_name, data_root=self._data_root, module_file=self._module_file, serving_model_dir=self._serving_model_dir, pipeline_root=self._pipeline_root, metadata_path=self._metadata_path, beam_pipeline_args=[]) model_resolver_id = 'ResolverNode.latest_blessed_model_resolver' stubbed_component_ids = [ component.id for component in taxi_pipeline.components if component.id != model_resolver_id ] stub_launcher = stub_component_launcher.get_stub_launcher_class( test_data_dir=self._recorded_output_dir, stubbed_component_ids=stubbed_component_ids, stubbed_component_map={}) stub_pipeline_config = pipeline_config.PipelineConfig( supported_launcher_classes=[ stub_launcher, ]) BeamDagRunner(config=stub_pipeline_config).run(taxi_pipeline) self.assertTrue(tf.io.gfile.exists(self._metadata_path)) metadata_config = metadata.sqlite_metadata_connection_config( self._metadata_path) # Verify that recorded files are successfully copied to the output uris. with metadata.Metadata(metadata_config) as m: artifacts = m.store.get_artifacts() artifact_count = len(artifacts) executions = m.store.get_executions() execution_count = len(executions) # Artifact count is greater by 3 due to extra artifacts produced by # Evaluator(blessing and evaluation), Trainer(model and model_run) and # Transform(example, graph, cache) minus Resolver which doesn't generate # new artifact. self.assertEqual(artifact_count, execution_count + 3) self.assertLen(taxi_pipeline.components, execution_count) for execution in executions: component_id = execution.properties[ metadata._EXECUTION_TYPE_KEY_COMPONENT_ID].string_value # pylint: disable=protected-access if component_id == 'ResolverNode.latest_blessed_model_resolver': continue eid = [execution.id] events = m.store.get_events_by_execution_ids(eid) output_events = [ x for x in events if x.type == metadata_store_pb2.Event.OUTPUT ] for event in output_events: steps = event.path.steps self.assertTrue(steps[0].HasField('key')) name = steps[0].key artifacts = m.store.get_artifacts_by_id( [event.artifact_id]) for idx, artifact in enumerate(artifacts): self.assertDirectoryEqual( artifact.uri, os.path.join(self._recorded_output_dir, component_id, name, str(idx)))
def testInitFailWithDupDefaultComponentConfigClasses(self): with self.assertRaises(ValueError): pipeline_config.PipelineConfig(default_component_configs=[ docker_component_config.DockerComponentConfig(), docker_component_config.DockerComponentConfig(), ])
def testInitFailWithDupLauncherClasses(self): with self.assertRaises(ValueError): pipeline_config.PipelineConfig(supported_launcher_classes=[ in_process_component_launcher.InProcessComponentLauncher, in_process_component_launcher.InProcessComponentLauncher, ])